Exemple #1
0
GtHcrDecoder *gt_hcr_decoder_new(const char *name, GtAlphabet *alpha,
                                 bool descs, GtTimer *timer, GtError *err)
{
    GtHcrDecoder *hcr_dec;
    int had_err = 0;

    gt_error_check(err);
    if (timer != NULL)
        gt_timer_show_progress(timer, "initialize hcr decoder", stdout);

    hcr_dec = gt_malloc(sizeof (GtHcrDecoder));

    if (descs) {
        hcr_dec->encdesc = gt_encdesc_load(name, err);
        if (gt_error_is_set(err)) {
            had_err = -1;
        }
    }
    else
        hcr_dec->encdesc = NULL;

    if (!had_err) {
        hcr_dec->seq_dec = hcr_seq_decoder_new(alpha, name, err);
        if (!gt_error_is_set(err))
            return hcr_dec;
    }
    gt_hcr_decoder_delete(hcr_dec);
    return NULL;
}
static int gt_readjoiner_assembly_build_graph(
    GtReadjoinerAssemblyArguments *arguments, GtStrgraph **strgraph,
    GtEncseq *reads, const char *readset, bool eqlen, GtUword rlen,
    GtUword nreads, GtBitsequence *contained, GtLogger *default_logger,
    GtLogger *verbose_logger, GtTimer *timer, GtError *err)
{
  int had_err = 0;
  *strgraph = gt_strgraph_new(nreads);

  if (arguments->minmatchlength > 0)
    gt_logger_log(verbose_logger, "SPM length cutoff = %u",
        arguments->minmatchlength);

  had_err = gt_readjoiner_assembly_count_spm(readset, eqlen,
      arguments->minmatchlength, arguments->nspmfiles, *strgraph, contained,
      default_logger, err);
  gt_readjoiner_assembly_show_current_space("(edges counted)");
  if (gt_showtime_enabled())
    gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_BUILDSG, stdout);
  gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_BUILDSG);

  if (had_err == 0)
  {
    gt_assert((eqlen && rlen > 0 && reads == NULL) ||
        (!eqlen && rlen == 0 && reads != NULL));
    gt_strgraph_allocate_graph(*strgraph, rlen, reads);
    gt_readjoiner_assembly_show_current_space("(graph allocated)");
    had_err = gt_strgraph_load_spm_from_file(*strgraph,
        (GtUword)arguments->minmatchlength, arguments->redtrans,
        contained, readset, arguments->nspmfiles,
        GT_READJOINER_SUFFIX_SPMLIST, err);
  }
  return had_err;
}
Exemple #3
0
int gt_hcr_decoder_decode_range(GtHcrDecoder *hcr_dec, const char *name,
                                GtUword start, GtUword end,
                                GtTimer *timer, GtError *err)
{
    char qual[BUFSIZ] = {0},
                        seq[BUFSIZ] = {0};
    GtStr *desc = gt_str_new();
    int had_err = 0;
    GtUword cur_width,
            cur_read;
    size_t i;
    FILE *output;
    GT_UNUSED GtHcrSeqDecoder *seq_dec;

    gt_error_check(err);
    gt_assert(hcr_dec && name);
    seq_dec = hcr_dec->seq_dec;
    gt_assert(start <= end);
    gt_assert(start < seq_dec->num_of_reads && end < seq_dec->num_of_reads);
    if (timer != NULL)
        gt_timer_show_progress(timer, "decode hcr", stdout);
    output = gt_fa_fopen_with_suffix(name, HCRFILEDECODEDSUFFIX, "w", err);
    if (output == NULL)
        had_err = -1;

    for (cur_read = start; had_err == 0 && cur_read <= end; cur_read++) {
        if (gt_hcr_decoder_decode(hcr_dec, cur_read, seq, qual, desc, err) != 0)
            had_err = -1;
        else {
            gt_xfputc(HCR_DESCSEPSEQ, output);
            if (hcr_dec->encdesc != NULL)
                gt_xfputs(gt_str_get(desc), output);
            else
                fprintf(output, ""GT_WU"", cur_read);
            gt_xfputc('\n', output);
            for (i = 0, cur_width = 0; i < strlen(seq); i++, cur_width++) {
                if (cur_width == HCR_LINEWIDTH) {
                    cur_width = 0;
                    gt_xfputc('\n', output);
                }
                gt_xfputc(seq[i], output);
            }
            gt_xfputc('\n', output);
            gt_xfputc(HCR_DESCSEPQUAL, output);
            gt_xfputc('\n', output);
            for (i = 0, cur_width = 0; i < strlen(qual); i++, cur_width++) {
                if (cur_width == HCR_LINEWIDTH) {
                    cur_width = 0;
                    gt_xfputc('\n', output);
                }
                gt_xfputc(qual[i], output);
            }
            gt_xfputc('\n', output);
        }
    }
    gt_fa_xfclose(output);
    gt_str_delete(desc);
    return had_err;
}
static int gt_compreads_compress_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments, GtError *err)
{
  GtCsrHcrEncodeArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlphabet *alpha = NULL;
  GtHcrEncoder *hcre = NULL;
  GtTimer *timer = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  if (gt_showtime_enabled()) {
    timer = gt_timer_new_with_progress_description("start");
    gt_timer_start(timer);
    gt_assert(timer);
  }

  if (gt_str_length(arguments->smap) > 0) {
    alpha = gt_alphabet_new_from_file_no_suffix(gt_str_get(arguments->smap),
                                                err);
    if (!alpha)
      had_err = 1;
  }
  else {
    alpha = gt_alphabet_new_dna();
    if (!alpha)
      had_err = 1;
  }
  if (!had_err) {
    if (timer != NULL)
      gt_timer_show_progress(timer, "encoding", stdout);
    hcre = gt_hcr_encoder_new(arguments->files, alpha, arguments->descs,
                              arguments->qrng, timer, err);
    if (!hcre)
      had_err = 1;
    else {
      if (arguments->pagewise)
        gt_hcr_encoder_set_sampling_page(hcre);
      else if (arguments->regular)
        gt_hcr_encoder_set_sampling_regular(hcre);

      gt_hcr_encoder_set_sampling_rate(hcre, arguments->srate);

      if (gt_hcr_encoder_encode(hcre, gt_str_get(arguments->name),
                                timer, err) != 0)
        had_err = 1;
    }
    gt_hcr_encoder_delete(hcre);
  }
  gt_alphabet_delete(alpha);
  if (timer != NULL) {
    gt_timer_show_progress_final(timer, stdout);
    gt_timer_delete(timer);
  }
  return had_err;
}
Exemple #5
0
unsigned long gt_uint64hashtable_partialsums(GtUint64hashtable *table,
                                             GtTimer *timer)
{
  size_t idx, next = 0;
  unsigned long psum, maxsize = 0;

  table->sortedhspace = gt_malloc((size_t) table->allentries *
                                  sizeof (*table->sortedhspace));
  if (timer != NULL)
  {
    gt_timer_show_progress(timer, "sorting the hashkeys",stdout);
  }
  for (idx = 0; idx < table->alloc; idx++)
  {
    if (table->hspace[idx].count > 0)
    {
      gt_assert(next < (size_t) table->allentries);
      table->sortedhspace[next++] = idx;
      if (maxsize < table->hspace[idx].count)
      {
        maxsize = table->hspace[idx].count;
      }
    }
  }
  gt_qsort_r(table->sortedhspace,next,sizeof(*table->sortedhspace),
             table->hspace,compareGtUint64hashstoredvalue);
  gt_assert(next > 0);
  if (table->zero_occurs)
  {
    table->hspace[table->sortedhspace[0]].count += table->zero_count;
  }
  if (timer != NULL)
  {
    gt_timer_show_progress(timer, "computing partial sums",stdout);
  }
  for (idx = (size_t) 1; idx < next; idx++)
  {
    table->hspace[table->sortedhspace[idx]].count +=
      table->hspace[table->sortedhspace[idx-1]].count;
  }
  psum = table->hspace[table->sortedhspace[next-1]].count;
  gt_free(table->sortedhspace);
  return psum;
}
Exemple #6
0
static int hcr_write_seq_qual_data(const char *name, GtHcrEncoder *hcr_enc,
                                   GtTimer *timer, GtError *err)
{
    int had_err = 0;
    FILE *fp;
    GtUword dummy = 0;
    GtWord pos;

    gt_error_check(err);

    fp = gt_fa_fopen_with_suffix(name, HCRFILESUFFIX, "wb", err);
    if (fp == NULL)
        had_err = -1;

    if (!had_err) {
        if (timer != NULL)
            gt_timer_show_progress(timer, "write sequences and qualities encoding",
                                   stdout);
        hcr_write_file_info(fp, hcr_enc);

        had_err = hcr_write_seqdistrtab(fp, hcr_enc);

        if (!had_err) {
            bool is_not_at_pageborder;

            pos = ftell(fp);
            gt_xfwrite_one(&dummy, fp);

            is_not_at_pageborder = (ftell(fp) % hcr_enc->pagesize) != 0;

            if (is_not_at_pageborder)
                hcr_enc->seq_encoder->start_of_encoding =
                    (ftell(fp) / hcr_enc->pagesize + 1) * hcr_enc->pagesize;
            else
                hcr_enc->seq_encoder->start_of_encoding = ftell(fp);

            if (hcr_enc->page_sampling)
                hcr_enc->seq_encoder->sampling =
                    gt_sampling_new_page(hcr_enc->sampling_rate,
                                         (off_t) hcr_enc->seq_encoder->start_of_encoding);
            else if (hcr_enc->regular_sampling)
                hcr_enc->seq_encoder->sampling =
                    gt_sampling_new_regular(hcr_enc->sampling_rate,
                                            (off_t) hcr_enc->seq_encoder->start_of_encoding);

            had_err = hcr_write_seqs(fp, hcr_enc, err);
        }
        if (!had_err) {
            gt_assert(fp);
            gt_xfseek(fp, pos, SEEK_SET);
            gt_xfwrite_one(&hcr_enc->seq_encoder->startofsamplingtab, fp);
        }
        gt_fa_xfclose(fp);
    }
    return 0;
}
static int gt_readjoiner_assembly_paths2seq(const char *readset,
    GtUword lengthcutoff, bool showpaths, bool astat,
    double coverage, bool load_copynum, GtUword buffersize,
    GtLogger *default_logger, GtTimer **timer, GtError *err)
{
  int had_err;
  GtEncseqLoader *el = gt_encseq_loader_new();
  GtEncseq *reads;

  if (gt_showtime_enabled())
  {
    gt_assert(timer != NULL);
    if (*timer == NULL) /* paths2seq */
    {
      *timer = gt_timer_new_with_progress_description(
          GT_READJOINER_ASSEMBLY_MSG_PUMPENCSEQ);
      gt_timer_show_cpu_time_by_progress(*timer);
      gt_timer_start(*timer);
    }
    else
      gt_timer_show_progress(*timer, GT_READJOINER_ASSEMBLY_MSG_PUMPENCSEQ,
          stdout);
  }
  gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_PUMPENCSEQ);
  gt_encseq_loader_drop_description_support(el);
  gt_encseq_loader_disable_autosupport(el);
  gt_encseq_loader_mirror(el);
  reads = gt_encseq_loader_load(el, readset, err);
  gt_assert(reads != NULL);
  gt_readjoiner_assembly_pump_encseq_through_cache(reads);
  if (gt_showtime_enabled())
    gt_timer_show_progress(*timer, GT_READJOINER_ASSEMBLY_MSG_OUTPUTCONTIGS,
        stdout);
  gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_OUTPUTCONTIGS);
  had_err = gt_contigpaths_to_fasta(readset, GT_READJOINER_SUFFIX_CONTIG_PATHS,
      GT_READJOINER_SUFFIX_CONTIGS, reads, lengthcutoff, showpaths,
      astat, coverage, load_copynum, (size_t)buffersize, default_logger, err);
  gt_encseq_delete(reads);
  gt_encseq_loader_delete(el);
  return had_err;
}
static void gt_readjoiner_assembly_load_graph(GtStrgraph **strgraph,
    GtEncseq *reads, const char *readset, GtUword rlen,
    GtLogger *default_logger, GtTimer *timer)
{
  *strgraph = gt_strgraph_new_from_file(reads, rlen, readset,
      GT_READJOINER_SUFFIX_SG);

  if (gt_showtime_enabled())
    gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_LOADSG, stdout);
  gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_LOADSG);

  gt_readjoiner_assembly_show_current_space("(graph loaded)");
}
Exemple #9
0
int gt_hcr_encoder_encode(GtHcrEncoder *hcr_enc, const char *name,
                          GtTimer *timer, GtError *err)
{
  int had_err = 0;
  GtStr *name1;
  gt_error_check(err);
  if (timer != NULL)
    gt_timer_show_progress(timer, "write encoding", stdout);
  if (hcr_enc->encdesc_encoder != NULL) {
    GtCstrIterator *cstr_iterator = gt_fasta_header_iterator_new(hcr_enc->files,
                                                                 err);
    had_err = gt_encdesc_encoder_encode(hcr_enc->encdesc_encoder,
                                        cstr_iterator, name, err);
    gt_cstr_iterator_delete(cstr_iterator);
  }

  if (!had_err)
    had_err = hcr_write_seq_qual_data(name, hcr_enc, timer, err);

  if (!had_err && gt_log_enabled()) {
    name1 = gt_str_new_cstr(name);
    gt_str_append_cstr(name1, HCRFILESUFFIX);
    gt_log_log("sequences with qualities encoding overview:");
    gt_log_log("**>");
    if (hcr_enc->page_sampling)
        gt_log_log("applied sampling technique: sampling every " GT_WU
                   "th page",
                   hcr_enc->sampling_rate);
    else if (hcr_enc->regular_sampling)
        gt_log_log("applied sampling technique: sampling every " GT_WU
                   "th read",
                   hcr_enc->sampling_rate);
    else
        gt_log_log("applied sampling technique: none");

    gt_log_log("total number of encoded nucleotide sequences with qualities: "
               GT_WU, hcr_enc->num_of_reads);
    gt_log_log("total number of encoded nucleotides: " GT_LLU,
               hcr_enc->seq_encoder->total_num_of_symbols);
    gt_log_log("bits per nucleotide encoding: %f",
               (gt_file_estimate_size(gt_str_get(name1)) * 8.0) /
                 hcr_enc->seq_encoder->total_num_of_symbols);
    gt_log_log("<**");
    gt_str_delete(name1);
  }
  return had_err;
}
static int gt_compreads_decompress_benchmark(GtHcrDecoder *hcrd,
                                             unsigned long amount,
                                             GtTimer *timer,
                                             GtError *err) {
  char qual[BUFSIZ] = {0},
       seq[BUFSIZ] = {0};
  int had_err = 0;
  unsigned long rand,
                max_rand = gt_hcr_decoder_num_of_reads(hcrd) - 1,
                count;

  GtStr *timer_comment = gt_str_new_cstr("extracting ");
  GtStr *desc = gt_str_new();

  gt_str_append_ulong(timer_comment, amount);
  gt_str_append_cstr(timer_comment, " reads of ");
  gt_str_append_ulong(timer_comment, max_rand + 1);
  gt_str_append_cstr(timer_comment, "!");

  if (timer == NULL) {
    timer = gt_timer_new_with_progress_description("extract random reads");
    gt_timer_start(timer);
  }
  else {
    gt_timer_show_progress(timer, "extract random reads", stdout);
  }

  gt_log_log("%s",gt_str_get(timer_comment));
  for (count = 0; count < amount; count++) {
    if (!had_err) {
      rand = gt_rand_max(max_rand);
      gt_log_log("get read: %lu", rand);
      had_err = gt_hcr_decoder_decode(hcrd, rand, seq, qual, desc, err);
      gt_log_log("%s",gt_str_get(desc));
      gt_log_log("%s",seq);
      gt_log_log("%s",qual);
    }
  }
  gt_str_delete(timer_comment);
  gt_str_delete(desc);
  if (!gt_showtime_enabled())
    gt_timer_delete(timer);
  return had_err;
}
Exemple #11
0
GtHcrEncoder *gt_hcr_encoder_new(GtStrArray *files, GtAlphabet *alpha,
                                 bool descs, GtQualRange qrange, GtTimer *timer,
                                 GtError *err)
{
    GtBaseQualDistr *bqd;
    GtHcrEncoder *hcr_enc;
    GtSeqIterator *seqit;
    GtStrArray *file;
    int had_err = 0,
        status;
    GtUword len1,
            len2,
            i,
            num_of_reads = 0;
    const GtUchar *seq,
          *qual;
    char *desc;

    gt_error_check(err);
    gt_assert(alpha && files);

    if (timer != NULL)
        gt_timer_show_progress(timer, "get <base,qual> distr", stdout);

    if (qrange.start != GT_UNDEF_UINT)
        if (qrange.start == qrange.end) {
            gt_error_set(err, "qrange.start must unequal qrange.end");
            return NULL;
        }

    hcr_enc = gt_malloc(sizeof (GtHcrEncoder));
    hcr_enc->files = files;
    hcr_enc->num_of_files = gt_str_array_size(files);
    hcr_enc->num_of_reads = 0;
    hcr_enc->page_sampling = false;
    hcr_enc->regular_sampling = false;
    hcr_enc->sampling_rate = 0;
    hcr_enc->pagesize = gt_pagesize();
    if (descs) {
        hcr_enc->encdesc_encoder = gt_encdesc_encoder_new();
        if (timer != NULL)
            gt_encdesc_encoder_set_timer(hcr_enc->encdesc_encoder, timer);
    }
    else
        hcr_enc->encdesc_encoder = NULL;

    hcr_enc->seq_encoder = gt_malloc(sizeof (GtHcrSeqEncoder));
    hcr_enc->seq_encoder->alpha = alpha;
    hcr_enc->seq_encoder->sampling = NULL;
    hcr_enc->seq_encoder->fileinfos = gt_calloc((size_t) hcr_enc->num_of_files,
                                      sizeof (*(hcr_enc->seq_encoder->fileinfos)));
    hcr_enc->seq_encoder->qrange = qrange;
    bqd = hcr_base_qual_distr_new(alpha, qrange);

    /* check if reads in the same file are of same length and get
       <base, quality> pair distribution */
    for (i = 0; i < hcr_enc->num_of_files; i++) {
        file = gt_str_array_new();
        gt_str_array_add(file, gt_str_array_get_str(files, i));
        seqit = gt_seq_iterator_fastq_new(file, err);
        if (!seqit) {
            gt_error_set(err, "cannot initialize GtSeqIteratorFastQ object");
            had_err = -1;
        }
        if (!had_err) {
            gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alpha));
            gt_seq_iterator_set_quality_buffer(seqit, &qual);
            status = gt_seq_iterator_next(seqit, &seq, &len1, &desc, err);

            if (status == 1) {
                num_of_reads = 1UL;
                while (!had_err) {
                    status = gt_seq_iterator_next(seqit, &seq, &len2, &desc, err);
                    if (status == -1)
                        had_err = -1;
                    if (status != 1)
                        break;
                    if (len2 != len1) {
                        gt_error_set(err, "reads have to be of equal length");
                        had_err = -1;
                        break;
                    }
                    if (hcr_base_qual_distr_add(bqd, qual, seq, len1) != 0)
                        had_err = -1;
                    len1 = len2;
                    num_of_reads++;
                }
            }
            else if (status == -1)
                had_err = -1;

            if (!had_err) {
                if (i == 0)
                    hcr_enc->seq_encoder->fileinfos[i].readnum = num_of_reads;
                else
                    hcr_enc->seq_encoder->fileinfos[i].readnum =
                        hcr_enc->seq_encoder->fileinfos[i - 1].readnum + num_of_reads;
                hcr_enc->seq_encoder->fileinfos[i].readlength = len1;
            }
        }
        hcr_enc->num_of_reads += num_of_reads;
        gt_str_array_delete(file);
        gt_seq_iterator_delete(seqit);
    }
    if (!had_err)
        hcr_base_qual_distr_trim(bqd);

    if (!had_err) {
        if (timer != NULL)
            gt_timer_show_progress(timer, "build huffman tree for sequences and"
                                   " qualities", stdout);
        hcr_enc->seq_encoder->huffman =
            gt_huffman_new(bqd,
                           hcr_base_qual_distr_func,
                           (GtUword) bqd->ncols * bqd->nrows);
    }
    if (!had_err) {
        hcr_enc->seq_encoder->qual_offset = bqd->qual_offset;
        hcr_base_qual_distr_delete(bqd);
        return hcr_enc;
    }
    return NULL;
}
Exemple #12
0
static int gt_genomediff_runner(int argc, const char **argv,
                                int parsed_args, void *tool_arguments,
                                GtError *err)
{
    bool mirrored = false;
    int had_err = 0,
        i;
    GtEncseq              *encseq = NULL;
    GtGenomediffArguments *arguments = tool_arguments;
    GtLogger              *logger;
    GtShuUnitFileInfo     *unit_info = NULL;
    GtTimer               *timer = NULL;

    gt_error_check(err);
    gt_assert(arguments);

    logger = gt_logger_new(arguments->verbose,
                           GT_LOGGER_DEFLT_PREFIX,
                           stdout);
    gt_assert(logger);

    for (i = parsed_args; i < argc; i++) {
        gt_str_array_add_cstr(arguments->filenames, argv[i]);
    }

    if (gt_showtime_enabled()) {
        timer = gt_timer_new_with_progress_description("start");
        gt_timer_start(timer);
        gt_assert(timer);
    }

    if (arguments->with_units) {
        gt_logger_log(logger, "unitfile option set, filename is %s\n",
                      gt_str_get(arguments->unitfile));
    }

    if (timer != NULL)
        gt_timer_show_progress(timer, "start shu search", stdout);

    if (gt_str_array_size(arguments->filenames) > 1UL) {
        GtEncseqEncoder *ee = gt_encseq_encoder_new();
        gt_encseq_encoder_set_timer(ee, timer);
        gt_encseq_encoder_set_logger(ee, logger);
        /* kr only makes sense for dna, so we can check this already with ee */
        gt_encseq_encoder_set_input_dna(ee);
        had_err = gt_encseq_encoder_encode(ee, arguments->filenames,
                                           gt_str_get(arguments->indexname), err);
        gt_encseq_encoder_delete(ee);
    }
    else {
        gt_str_append_str(arguments->indexname,
                          gt_str_array_get_str(arguments->filenames, 0));
        if (arguments->with_esa || arguments->with_pck) {
            GtStr *current_line = gt_str_new();
            FILE *prj_fp;
            const char *buffer;
            char **elements = NULL;

            prj_fp = gt_fa_fopen_with_suffix(gt_str_get(arguments->indexname),
                                             GT_PROJECTFILESUFFIX,"rb",err);
            if (prj_fp == NULL)
                had_err = -1;
            while (!had_err && gt_str_read_next_line(current_line, prj_fp) != EOF) {
                buffer = gt_str_get(current_line);
                if (elements != NULL) {
                    gt_free(elements[0]);
                    gt_free(elements[1]);
                }
                gt_free(elements);
                elements = gt_cstr_split(buffer, '=');
                gt_log_log("%s", elements[0]);
                if (strcmp("mirrored", elements[0]) == 0) {
                    gt_log_log("%s", elements[1]);
                    if (strcmp("1", elements[1]) == 0) {
                        mirrored = true;
                        gt_log_log("sequences are treated as mirrored");
                    }
                }
                gt_str_reset(current_line);
            }
            gt_str_delete(current_line);
            if (elements != NULL) {
                gt_free(elements[0]);
                gt_free(elements[1]);
            }
            gt_free(elements);
            gt_fa_xfclose(prj_fp);
        }
    }

    if (!had_err) {
        GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts,
                             err);
        if (mirrored)
            gt_encseq_loader_mirror(el);
        encseq =
            gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err);
        gt_encseq_loader_delete(el);
    }
    if (encseq == NULL)
        had_err = -1;
    if (!had_err) {
        unit_info = gt_shu_unit_info_new(encseq);
        if (arguments->with_units)
            had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info,
                                                 logger, err);
    }

    if (!had_err) {
        uint64_t **shusums = NULL;
        if (arguments->with_esa || arguments->with_pck) {
            shusums = gt_genomediff_shulen_sum(arguments, unit_info,
                                               logger, timer, err);
            if (shusums == NULL)
                had_err = -1;
        }
        else {
            const bool doesa = true;
            GenomediffInfo gd_info;
            Suffixeratoroptions sopts;
            sopts.beverbose = arguments->verbose;
            sopts.indexname = arguments->indexname;
            sopts.db = NULL;
            sopts.encopts = NULL;
            sopts.genomediff = true;
            sopts.inputindex = arguments->indexname;
            sopts.loadopts = arguments->loadopts;
            sopts.showprogress = false;
            sopts.idxopts = arguments->idxopts;

            gt_assert(unit_info != NULL);
            gt_array2dim_calloc(shusums, unit_info->num_of_genomes,
                                unit_info->num_of_genomes);
            gd_info.shulensums = shusums;
            gd_info.unit_info = unit_info;
            had_err = gt_runsuffixerator(doesa, &sopts, &gd_info, logger, err);
        }
        if (!had_err && shusums != NULL) {
            had_err = gt_genomediff_kr_calc(shusums, arguments, unit_info,
                                            arguments->with_pck, logger, timer, err);
            gt_array2dim_delete(shusums);
        }
    }

    if (timer != NULL) {
        gt_timer_show_progress_final(timer, stdout);
        gt_timer_delete(timer);
    }
    gt_logger_delete(logger);
    gt_encseq_delete(encseq);
    gt_shu_unit_info_delete(unit_info);

    return had_err;
}
static int gt_compreads_decompress_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments, GtError *err)
{
  GtCsrHcrDecodeArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlphabet *alpha = NULL;
  GtHcrDecoder *hcrd = NULL;
  GtTimer *timer = NULL;
  unsigned long start,
                end;

  gt_error_check(err);
  gt_assert(arguments);

  if (gt_showtime_enabled()) {
    timer = gt_timer_new_with_progress_description("start");
    gt_timer_start(timer);
    gt_assert(timer);
  }

  if (gt_str_length(arguments->smap) > 0) {
    alpha = gt_alphabet_new_from_file_no_suffix(gt_str_get(arguments->smap),
                                                err);
    if (!alpha)
      had_err = -1;
  }
  else {
    alpha = gt_alphabet_new_dna();
    if (!alpha)
      had_err = -1;
  }

  if (!had_err) {
    if (timer != NULL)
      gt_timer_show_progress(timer, "decoding", stdout);

    if (gt_str_length(arguments->name) == 0) {
      char *basenameptr;
      basenameptr = gt_basename(gt_str_get(arguments->file));
      gt_str_set(arguments->name, basenameptr);
      gt_free(basenameptr);
    }
    hcrd = gt_hcr_decoder_new(gt_str_get(arguments->file), alpha,
                              arguments->descs, timer, err);
    if (hcrd == NULL)
      had_err = -1;
    else {
      if (arguments->bench != 0) {
        had_err = gt_compreads_decompress_benchmark(hcrd,
                                                    arguments->bench,
                                                    timer, err);
      }
      else {
        if (arguments->rng.start != GT_UNDEF_ULONG
            && arguments->rng.end != GT_UNDEF_ULONG) {
          if (arguments->rng.start >= gt_hcr_decoder_num_of_reads(hcrd)
                || arguments->rng.end >= gt_hcr_decoder_num_of_reads(hcrd)) {
            gt_error_set(err, "range %lu-%lu includes a read number exceeding "
                              "the total number of reads (%lu)",
                              arguments->rng.start,
                              arguments->rng.end,
                              gt_hcr_decoder_num_of_reads(hcrd));
            had_err = -1;
          }
          start = arguments->rng.start;
          end = arguments->rng.end;
        }
        else {
          start = 0;
          end = gt_hcr_decoder_num_of_reads(hcrd) - 1;
        }
        if (!had_err) {
          gt_log_log("filebasename: %s", gt_str_get(arguments->name));
          if (gt_hcr_decoder_decode_range(hcrd, gt_str_get(arguments->name),
                                          start, end, timer, err)
            != 0)
            had_err = -1;
        }
      }
    }
    gt_hcr_decoder_delete(hcrd);
  }
  gt_alphabet_delete(alpha);
  if (timer != NULL) {
    gt_timer_show_progress_final(timer, stdout);
    gt_timer_delete(timer);
  }
  if (had_err)
    gt_assert(gt_error_is_set(err));
  return had_err;
}
static int gt_kmer_database_runner(GT_UNUSED int argc, const char **argv,
                                   int parsed_args, void *tool_arguments,
                                   GtError *err)
{
  GtKmerDatabaseArguments *arguments = tool_arguments;
  int had_err = 0;
  GtEncseq       *es;
  GtUword        es_length,
                 nu_kmer_codes = 0;
  GtKmerDatabase *compare_db = NULL,
                 *db = NULL;
  GtLogger *logger;
  FILE *fp = NULL;
  GtHashmap *kmer_hash = NULL;
  GtTimer *timer = NULL;

  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->use_hash)
    kmer_hash = gt_hashmap_new(GT_HASH_DIRECT, NULL,
                               (GtFree) gt_kmer_database_delete_hash_value);
  if (arguments->bench)
    timer = gt_timer_new_with_progress_description("loading encoded sequence");

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);

  if (arguments->verbose && gt_str_length(arguments->print_filename) > 0UL) {
    fp = gt_fa_fopen(gt_str_get(arguments->print_filename), "w", err);
    gt_logger_set_target(logger, fp);
  }

  if (!had_err) {
    GtEncseqLoader *es_l;
    if (arguments->bench)
      gt_timer_start(timer);
    es_l = gt_encseq_loader_new();
    es = gt_encseq_loader_load(es_l, argv[parsed_args], err);
    if (arguments->bench)
      gt_timer_show_progress(timer, "saving kmers (+iterating over file)",
                             stdout);
    if (es == NULL) {
      had_err = -1;
    }
    gt_encseq_loader_delete(es_l);
  }
  if (!had_err) {
    es_length = gt_encseq_total_length(es);
    if (es_length < (GtUword) arguments->kmersize) {
      gt_error_set(err, "Input is too short for used kmersize. File length: "
                   GT_WU " kmersize: %u", es_length, arguments->kmersize);
      had_err = -1;
    }
  }
  if (!had_err) {
    GtAlphabet *alphabet;
    alphabet = gt_encseq_alphabet(es);
    if (arguments->bench)
    nu_kmer_codes = gt_power_for_small_exponents(
                                            gt_alphabet_num_of_chars(alphabet),
                                            arguments->kmersize);
    if (!arguments->merge_only && !arguments->use_hash && !arguments->bench) {
      compare_db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet),
                                arguments->kmersize, arguments->sb_size, es);
    }
    if (!arguments->use_hash) {
      db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet),
                                arguments->kmersize,
                                arguments->sb_size, es);
      if (arguments->cutoff) {
        if (arguments->mean_cutoff)
          gt_kmer_database_use_mean_cutoff(db, (GtUword) 2,
                                           arguments->cutoff_value);
        else
          gt_kmer_database_set_cutoff(db, arguments->cutoff_value);
        if (!arguments->prune)
          gt_kmer_database_set_prune(db);
      }
    }
  }

  if (!had_err) {
    GtUword startpos = 0,
            endpos;
    GtKmercodeiterator *iter;
    const GtKmercode *kmercode = NULL;
    iter = gt_kmercodeiterator_encseq_new(es, GT_READMODE_FORWARD,
                                          arguments->kmersize, 0);
    while (!had_err && startpos < es_length - (arguments->kmersize - 1)) {
      GtUword startpos_add_kmer = startpos;
      if (arguments->merge_only) {
        endpos = startpos + (arguments->kmersize - 1) +
                 (gt_rand_max((arguments->sb_size - 1) * 2));
        if (endpos > es_length)
          endpos = es_length;
      }
      else {
        endpos = startpos + (arguments->kmersize - 1) +
                 (gt_rand_max(arguments->sb_size - 1));
      }
      gt_kmercodeiterator_reset(iter, GT_READMODE_FORWARD, startpos);
      while ((kmercode = gt_kmercodeiterator_encseq_next(iter)) != NULL &&
             startpos_add_kmer <= endpos - (arguments->kmersize - 1)) {
        if (!arguments->merge_only && !arguments->use_hash &&
            !kmercode->definedspecialposition && !arguments->bench) {
          gt_kmer_database_add_kmer(compare_db, kmercode->code,
                                    startpos_add_kmer);
        }
        if (arguments->use_hash && !kmercode->definedspecialposition) {
          gt_kmer_database_add_to_hash(kmer_hash, kmercode->code,
                                       startpos_add_kmer);
        }
        startpos_add_kmer++;
      }
      if (!arguments->use_hash) {
        gt_kmer_database_add_interval(db, startpos, endpos);
        gt_kmer_database_print_buffer(db, logger);
        if (!arguments->bench)
          had_err = gt_kmer_database_check_consistency(db, err);
      }
      startpos = endpos + 1;
    }
    if (!arguments->use_hash) {
      gt_kmer_database_flush(db);
      gt_kmer_database_print_buffer(db, logger);
      if (!had_err && !arguments->bench)
        had_err = gt_kmer_database_check_consistency(db, err);
      if (!arguments->merge_only && !had_err && !arguments->bench)
        had_err = gt_kmer_database_check_consistency(compare_db, err);
      if (!arguments->merge_only && !arguments->bench)
        gt_kmer_database_print(compare_db, logger, true);
      if (!arguments->merge_only && !had_err && !arguments->bench)
        had_err = gt_kmer_database_compare(compare_db, db, err);
      gt_kmer_database_print(db, logger, true);
    }
    gt_kmercodeiterator_delete(iter);
  }

  if (arguments->bench) {
    GtKmerStartpos pos;
    GtArrayGtUword *pos_hash;
    GtUword rand_access = (GtUword) 50000000,
            rand_code,
            i,
            sum = 0;
    gt_timer_show_progress(timer, "random access", stdout);
    for (i = 0; i < rand_access; i++) {
      rand_code = gt_rand_max(nu_kmer_codes - 1);
      if (arguments->use_hash) {
        pos_hash = gt_hashmap_get(kmer_hash, (const void *) rand_code);
        if (pos_hash != NULL)
          sum += pos_hash->spaceGtUword[pos_hash->nextfreeGtUword - 1];
      }
      else {
        pos = gt_kmer_database_get_startpos(db, rand_code);
        if (pos.no_positions > 0)
          sum += pos.startpos[pos.no_positions - 1];
      }
    }
    printf("sum: " GT_WU "\n", sum);

    gt_timer_show_progress(timer, "", stdout);
    gt_timer_stop(timer);
    gt_timer_delete(timer);
  }
  if (arguments->use_hash)
    gt_hashmap_delete(kmer_hash);
  gt_encseq_delete(es);
  if (!arguments->use_hash)
    gt_kmer_database_delete(db);
  if (!arguments->merge_only && !arguments->bench)
    gt_kmer_database_delete(compare_db);
  gt_logger_delete(logger);
  gt_fa_fclose(fp);

  return had_err;
}
static int gt_gdiffcalc_runner(int argc, const char **argv, int parsed_args,
                              void *tool_arguments, GT_UNUSED GtError *err)
{
  GtGenomediffArguments *arguments = tool_arguments;
  int had_err = 0, i;
  GtUword lcounter = 0, zcounter = 0;
  double **shusums = NULL;
  GtEncseq              *encseq = NULL;
  GtLogger              *logger;
  GtShuUnitFileInfo     *unit_info = NULL;
  GtTimer               *timer = NULL;

  gt_error_check(err);
  gt_assert(arguments);

  logger = gt_logger_new(arguments->verbose,
                         GT_LOGGER_DEFLT_PREFIX,
                         stdout);
  gt_assert(logger);

  for (i = parsed_args; i < argc; i++) {
    gt_str_array_add_cstr(arguments->filenames, argv[i]);
  }

  if (gt_showtime_enabled()) {
    timer = gt_timer_new_with_progress_description("load encseq");
    gt_timer_start(timer);
    gt_assert(timer);
  }

  if (arguments->with_units) {
    gt_logger_log(logger, "unitfile option set, filename is %s\n",
                  gt_str_get(arguments->unitfile));
  }

  if (!had_err) {
    GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts,
                                                           err);
    encseq =
      gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err);
    gt_encseq_loader_delete(el);
  }
  if (encseq == NULL)
    had_err = -1;

  if (timer != NULL)
    gt_timer_show_progress(timer, "load units", stdout);

  if (!had_err) {
    unit_info = gt_shu_unit_info_new(encseq);
    if (arguments->with_units)
      had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info,
                                           logger, err);
  }

  if (timer != NULL)
    gt_timer_show_progress(timer, "read table", stdout);

  if (!had_err) {
    GtIO *table_file = NULL;
    GtTokenizer *tokenizer = NULL;
    GtStr *line = NULL;

    gt_assert(unit_info != NULL);
    gt_array2dim_calloc(shusums, unit_info->num_of_genomes,
                        unit_info->num_of_genomes);

    table_file = gt_io_new(gt_str_array_get(arguments->filenames, 0), "r");
    tokenizer = gt_tokenizer_new(table_file);
    line = gt_tokenizer_get_token(tokenizer);
    while (line != NULL && !had_err) {
      char *cline = gt_str_get(line);
      char *elem = strtok(cline, ";");
      zcounter = 0;
      while (elem != NULL && !had_err) {
        if (*elem != '#') {
          if (1 != sscanf(elem, "%lf",
                          &shusums[lcounter][zcounter])) {
            had_err = 1;
            gt_error_set(err, "couldn't scan");
            break;
          }
          gt_logger_log(logger,"wert: %lf", shusums[lcounter][zcounter]);
          zcounter++;
        }
        else {
          gt_logger_log(logger, "name: %s", elem++);
        }
        elem = strtok(NULL, ";");
      }
      gt_tokenizer_next_token(tokenizer);
      gt_str_delete(line);
      line = gt_tokenizer_get_token(tokenizer);
      lcounter++;
      gt_logger_log(logger, "line "GT_WD"", lcounter);
    }
  }
  if (!had_err) {
    GtUword num_of_seq, file_idx, seq_idx, startpos;
    GT_UNUSED GtUword oldpos = 0;

    gt_assert(unit_info != NULL);
    gt_assert(lcounter == zcounter);
    gt_assert(lcounter == unit_info->num_of_genomes);

    num_of_seq = gt_encseq_num_of_sequences(unit_info->encseq);

    for (seq_idx = 0; seq_idx < num_of_seq; seq_idx++) {
      startpos = gt_encseq_seqstartpos(unit_info->encseq, seq_idx);
      file_idx = gt_encseq_filenum(unit_info->encseq, startpos);
      gt_log_log("seq: "GT_WU" starts at: "GT_WU"\n"
                 "belonges to file: "GT_WU" which is part of genome: %s",
                 seq_idx, startpos, file_idx,
                 gt_str_array_get(unit_info->genome_names,
                                  unit_info->map_files[file_idx]));
      gt_assert(oldpos <= startpos);
      oldpos = startpos;
    }
  }
  if (!had_err && shusums != NULL) {
    had_err = gt_genomediff_calculate_div_from_avg(shusums, arguments,
                                                   unit_info,
                                                   logger, timer, err);
    gt_array2dim_delete(shusums);
  }

  if (timer != NULL) {
    gt_timer_show_progress_final(timer, stdout);
    gt_timer_delete(timer);
  }
  gt_logger_delete(logger);
  gt_encseq_delete(encseq);
  gt_shu_unit_info_delete(unit_info);
  return had_err;
}
static int gt_readjoiner_assembly_runner(GT_UNUSED int argc,
    GT_UNUSED const char **argv, GT_UNUSED int parsed_args,
    void *tool_arguments, GtError *err)
{
  GtReadjoinerAssemblyArguments *arguments = tool_arguments;
  GtLogger *verbose_logger, *default_logger;
  GtEncseqLoader *el;
  GtEncseq *reads;
  GtTimer *timer = NULL;
  GtStrgraph *strgraph = NULL;
  GtBitsequence *contained = NULL;
  const char *readset = gt_str_get(arguments->readset);
  bool eqlen = true;
  GtUword nreads, tlen, rlen;
  int had_err = 0;

  gt_assert(arguments);
  gt_error_check(err);
  default_logger = gt_logger_new(!arguments->quiet, GT_LOGGER_DEFLT_PREFIX,
      stdout);
  gt_logger_log(default_logger,
      "gt readjoiner assembly (version "GT_READJOINER_VERSION")");
  verbose_logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX,
      stdout);
  gt_logger_log(verbose_logger, "verbose output activated");
  gt_logger_log(verbose_logger, "readset name = %s", readset);
  if (gt_showtime_enabled())
  {
    timer = gt_timer_new_with_progress_description(
        GT_READJOINER_ASSEMBLY_MSG_COUNTSPM);
    gt_timer_start(timer);
    gt_timer_show_cpu_time_by_progress(timer);
  }

  if (!arguments->paths2seq)
  {
    el = gt_encseq_loader_new();
    gt_encseq_loader_drop_description_support(el);
    gt_encseq_loader_disable_autosupport(el);
    reads = gt_encseq_loader_load(el, readset, err);
    if (reads == NULL)
    {
      had_err = -1;
    }
    if (had_err == 0)
    {
      eqlen = gt_encseq_accesstype_get(reads) == GT_ACCESS_TYPE_EQUALLENGTH;
      nreads = gt_encseq_num_of_sequences(reads);
      gt_logger_log(default_logger, "number of reads in filtered readset = "
                    GT_WU, nreads);
      tlen = gt_encseq_total_length(reads) - nreads + 1;
      gt_logger_log(verbose_logger, "total length of filtered readset = " GT_WU,
          tlen);

      if (eqlen)
      {
        rlen = gt_encseq_seqlength(reads, 0);
        gt_logger_log(verbose_logger, "read length = " GT_WU, rlen);
        gt_encseq_delete(reads);
        reads = NULL;
      }
      else
      {
        had_err = gt_readjoiner_assembly_build_contained_reads_list(
          arguments, &contained, err);
        rlen = 0;
        gt_logger_log(verbose_logger, "read length = variable");
        gt_assert(reads != NULL);
      }
    }

    if (had_err == 0)
    {
      if (!arguments->load)
      {
        had_err = gt_readjoiner_assembly_build_graph(arguments, &strgraph,
            reads, readset, eqlen, rlen, nreads, contained, default_logger,
            verbose_logger, timer, err);
      }
      else
      {
        gt_readjoiner_assembly_load_graph(&strgraph, reads, readset, rlen,
            default_logger, timer);
      }
    }

    if (!eqlen && reads != NULL && !arguments->errors)
    {
      gt_encseq_delete(reads);
      reads = NULL;
      if (had_err == 0)
        gt_strgraph_set_encseq(strgraph, NULL);
    }

    if (had_err == 0 && arguments->redtrans)
    {
      if (gt_showtime_enabled())
        gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_REDTRANS,
            stdout);
      gt_strgraph_sort_edges_by_len(strgraph, false);
      (void)gt_strgraph_redtrans(strgraph, false);
      (void)gt_strgraph_redself(strgraph, false);
      (void)gt_strgraph_redwithrc(strgraph, false);
      gt_strgraph_log_stats(strgraph, verbose_logger);
    }

    if (had_err == 0 && arguments->errors)
    {
      if (gt_showtime_enabled())
        gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_CLEANSG,
            stdout);
      gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_CLEANSG);
      had_err = gt_readjoiner_assembly_error_correction(strgraph,
          arguments->bubble, arguments->deadend, arguments->deadend_depth,
          verbose_logger);
    }

    if (had_err == 0 && arguments->save)
    {
      if (gt_showtime_enabled())
        gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_SAVESG,
            stdout);
      gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_SAVESG);
      gt_strgraph_show(strgraph, GT_STRGRAPH_BIN,
          gt_str_get(arguments->readset), GT_READJOINER_SUFFIX_SG, false);
    }

    if (!eqlen && reads != NULL)
    {
      gt_encseq_delete(reads);
      reads = NULL;
      if (had_err == 0)
        gt_strgraph_set_encseq(strgraph, NULL);
    }

    if (had_err == 0)
    {
      if (gt_showtime_enabled())
        gt_timer_show_progress(timer, GT_READJOINER_ASSEMBLY_MSG_TRAVERSESG,
            stdout);
      gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_TRAVERSESG);
      gt_readjoiner_assembly_show_current_space("(before traversal)");
      gt_strgraph_spell(strgraph, (GtUword)arguments->depthcutoff,
          (GtUword)arguments->lengthcutoff, arguments->vd, readset,
          GT_READJOINER_SUFFIX_CONTIG_PATHS, NULL, true,
          arguments->show_contigs_info, false, verbose_logger);
    }

    if (contained != NULL)
      gt_free(contained);
    gt_strgraph_delete(strgraph);
    strgraph = NULL;
    gt_assert(reads == NULL);
    gt_encseq_loader_delete(el);
  }

  if (had_err == 0)
  {
    gt_readjoiner_assembly_show_current_space("(before paths2seq)");
    had_err = gt_readjoiner_assembly_paths2seq(readset,
        (GtUword)arguments->lengthcutoff, arguments->vd,
        arguments->astat, arguments->coverage, arguments->copynum,
        arguments->buffersize, default_logger, &timer, err);
  }

  if (gt_showtime_enabled())
  {
    gt_timer_show_progress_final(timer, stdout);
    gt_timer_delete(timer);
  }
  gt_logger_delete(default_logger);
  gt_logger_delete(verbose_logger);
  return had_err;
}
static int gt_condenser_search_runner(GT_UNUSED int argc,
                                      GT_UNUSED const char **argv,
                                      GT_UNUSED int parsed_args,
                                      void *tool_arguments,
                                      GtError *err)
{
  GtCondenserSearchArguments *arguments = tool_arguments;
  int i, had_err = 0;
  char *querypath = gt_str_get(arguments->querypath);
  GtStr* coarse_fname = gt_str_new_cstr("coarse_");
  char *db_basename = NULL;
  char *suffix_ptr = NULL;
  GtTimer *timer = NULL;
  GtLogger *logger = NULL;

  gt_error_check(err);
  gt_assert(arguments);

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);

  db_basename = gt_basename(gt_str_get(arguments->dbpath));
  /* if first char is '.' this might be a hidden file */
  if (strlen(db_basename) > (size_t) 1 &&
      (suffix_ptr = strrchr(db_basename + 1, '.')) != NULL) {
    /* remove suffix */
    *suffix_ptr = '\0';
  }
  gt_str_append_cstr(coarse_fname, db_basename);
  gt_str_append_cstr(coarse_fname, ".fas");
  gt_free(db_basename);
  db_basename = NULL;
  suffix_ptr = NULL;

  if (arguments->blastn || arguments->blastp) {
    GtMatch              *match;
    GtMatchIterator      *mp = NULL;
    GtNREncseq           *nrencseq = NULL;
    GtStr                *fastaname = gt_str_clone(arguments->dbpath);
    HitPosition          *hits;
    double                eval,
                          raw_eval = 0.0;
    GtUword               coarse_db_len = 0;
    GtMatchIteratorStatus status;
    int                   curr_hits = 0,
                          max_hits = 100;

    hits = gt_malloc(sizeof (*hits) * (size_t) max_hits);

    gt_str_append_cstr(fastaname, ".fas");

    for (i=0; i < max_hits; i++) {
      hits[i].range = gt_malloc(sizeof (*hits[i].range) * (size_t) 1);
    }

    if (gt_showtime_enabled()) {
      timer = gt_timer_new_with_progress_description("initialization");
      gt_timer_start(timer);
    }

    /*extract sequences from compressed database*/
    if (!had_err) {
      nrencseq = gt_n_r_encseq_new_from_file(gt_str_get(arguments->dbpath),
                                             logger, err);
      if (nrencseq == NULL)
        had_err = -1;
    }
    if (!had_err) {
      if (arguments->ceval == GT_UNDEF_DOUBLE ||
          arguments->feval == GT_UNDEF_DOUBLE) {
        /* from NCBI BLAST tutorial:
           E = Kmne^{-lambdaS}
           calculates E-value for score S with natural scale parameters K for
           search space size and lambda for the scoring system
           E = mn2^-S'
           m being the subject (total) length, n the length of ONE query
           calculates E-value for bit-score S'
         */
        GtFastaReader *reader;
        GtCondenserSearchAvg avg = {0,0};
        reader = gt_fasta_reader_rec_new(arguments->querypath);
        had_err = gt_fasta_reader_run(reader, NULL, NULL,
                                      gt_condenser_search_cum_moving_avg,
                                      &avg,
                                      err);
        if (!had_err) {
          GtUword S = arguments->bitscore;
          gt_log_log(GT_WU " queries, avg query size: " GT_WU,
                     avg.count, avg.avg);
          raw_eval = 1/pow(2.0, (double) S) * avg.avg;
          gt_logger_log(logger, "Raw E-value set to %.4e", raw_eval);
          gt_assert(avg.avg != 0);
        }
        gt_fasta_reader_delete(reader);
      }
    }

    /*create BLAST database from compressed database fasta file*/
    if (!had_err) {
      if (timer != NULL)
        gt_timer_show_progress(timer, "create coarse BLAST db", stderr);
      if (arguments->blastn)
        had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(fastaname),
                                                          err);
      else
        had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(fastaname),
                                                          err);
    }

    if (!had_err) {
      GtBlastProcessCall *call;

      if (timer != NULL)
        gt_timer_show_progress(timer, "coarse BLAST run", stderr);

      if (arguments->blastp)
        call = gt_blast_process_call_new_prot();
      else
        call = gt_blast_process_call_new_nucl();
      gt_blast_process_call_set_db(call, gt_str_get(fastaname));
      gt_blast_process_call_set_query(call, querypath);
      gt_blast_process_call_set_evalue(call, arguments->ceval);
      gt_blast_process_call_set_num_threads(call, arguments->blthreads);

      mp = gt_match_iterator_blast_process_new(call, err);
      if (!mp)
        had_err = -1;

      gt_blast_process_call_delete(call);

      while (!had_err &&
             (status = gt_match_iterator_next(mp, &match, err)) !=
             GT_MATCHER_STATUS_END)
      {
        if (status == GT_MATCHER_STATUS_OK) {
          GtUword hit_seq_id;
          char string[7];
          const char *dbseqid = gt_match_get_seqid2(match);
          if (sscanf(dbseqid,"%6s" GT_WU, string, &hit_seq_id) == 2) {
            gt_match_get_range_seq2(match, hits[curr_hits].range);
            hits[curr_hits].idx = hit_seq_id;
            gt_match_delete(match);
            curr_hits++;
            if (curr_hits == max_hits) {
              HitPosition *hit_extention;
              max_hits += 100;
              hits = gt_realloc(hits, sizeof (*hit_extention) * max_hits);
              for (i=max_hits - 100; i < max_hits; i++) {
                hits[i].range = gt_malloc(sizeof (*hits[i].range));
              }
            }
          } else {
            gt_error_set(err, "could not parse unique db header %s", dbseqid);
            had_err = -1;
          }
        } else if (status == GT_MATCHER_STATUS_ERROR) {
          had_err = -1;
        }
      }
      gt_match_iterator_delete(mp);
    }
    /*extract sequences*/
    if (!had_err) {
      GtNREncseqDecompressor *decomp;
      GtFile *coarse_hits;
      if (timer != NULL)
        gt_timer_show_progress(timer, "extract coarse search hits", stderr);
      decomp = gt_n_r_encseq_decompressor_new(nrencseq);
      coarse_hits = gt_file_new(gt_str_get(coarse_fname),"w", err);
      /* TODO DW do NOT extract complete uniques! these could be complete
         chromosomes!! just extract something around it? maybe +- max query
         length*/
      for (i = 0; i < curr_hits; i++) {
        gt_n_r_encseq_decompressor_add_unique_idx_to_extract(decomp,
                                                             hits[i].idx);
      }
      had_err =
        gt_n_r_encseq_decompressor_start_unique_extraction(coarse_hits,
                                                           decomp,
                                                           &coarse_db_len,
                                                           err);
      gt_assert(coarse_db_len != 0);
      gt_file_delete(coarse_hits);
      gt_n_r_encseq_decompressor_delete(decomp);
    }
    gt_n_r_encseq_delete(nrencseq);

    /* create BLAST database from decompressed database file */
    if (!had_err) {
      if (timer != NULL)
        gt_timer_show_progress(timer, "create fine BLAST db", stderr);
      if (arguments->blastn)
        had_err =
          gt_condenser_search_create_nucl_blastdb(gt_str_get(coarse_fname),
                                                  err);
      else
        had_err =
          gt_condenser_search_create_prot_blastdb(gt_str_get(coarse_fname),
                                                  err);
    }
    /* perform fine BLAST search */
    if (!had_err) {
      GtBlastProcessCall *call;

      if (timer != NULL)
        gt_timer_show_progress(timer, "fine BLAST run", stderr);

      if (arguments->feval == GT_UNDEF_DOUBLE) {
        eval = raw_eval * coarse_db_len;
      } else {
        eval = arguments->feval;
      }

      if (arguments->blastp)
        call = gt_blast_process_call_new_prot();
      else
        call = gt_blast_process_call_new_nucl();

      gt_blast_process_call_set_db(call, gt_str_get(coarse_fname));
      gt_blast_process_call_set_query(call, querypath);
      gt_blast_process_call_set_evalue(call, eval);
      gt_blast_process_call_set_num_threads(call, arguments->blthreads);

      gt_logger_log(logger, "Fine E-value set to: %.4e (len)" GT_WU, eval,
                    coarse_db_len);

      mp = gt_match_iterator_blast_process_new(call, err);
      if (!mp)
        had_err = -1;

      gt_blast_process_call_delete(call);

      if (!had_err) {
        GtUword numofhits = 0;
        while (!had_err &&
               (status = gt_match_iterator_next(mp, &match, err)) !=
               GT_MATCHER_STATUS_END) {
          if (status == GT_MATCHER_STATUS_OK) {
            GtMatchBlast *matchb = (GtMatchBlast*) match;
            char *dbseqid = gt_malloc(sizeof (*dbseqid) * 50);
            GtRange range_seq1;
            GtRange range_seq2;
            numofhits++;
            gt_match_get_range_seq1(match, &range_seq1);
            gt_match_get_range_seq2(match, &range_seq2);
            gt_file_xprintf(
                    arguments->outfp,
                    "%s\t%s\t%.2f\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t"
                    GT_WU "\t%g\t%.3f\n",
                    gt_match_get_seqid1(match),
                    gt_match_get_seqid2(match),
                    gt_match_blast_get_similarity(matchb),
                    gt_match_blast_get_align_length(matchb),
                    range_seq1.start,
                    range_seq1.end,
                    range_seq2.start,
                    range_seq2.end,
                    gt_match_blast_get_evalue(matchb),
                    (double) gt_match_blast_get_bitscore(matchb));
            gt_match_delete(match);
            gt_free(dbseqid);
          } else if (status == GT_MATCHER_STATUS_ERROR) {
            had_err = -1;
          }
        }
        gt_log_log(GT_WU " hits found\n", numofhits);
      }
      gt_match_iterator_delete(mp);

    }
    if (!had_err)
      if (timer != NULL)
        gt_timer_show_progress_final(timer, stderr);
    gt_timer_delete(timer);

    /*cleanup*/
    for (i=0; i < max_hits; i++) {
      gt_free(hits[i].range);
    }
    gt_free(hits);
    gt_str_delete(fastaname);
  }
  gt_str_delete(coarse_fname);
  gt_logger_delete(logger);
  return had_err;
}