Пример #1
0
char *gt_condenseq_basefilename(const GtCondenseq *condenseq)
{
  char *basename = NULL,
       *suffix_ptr;
  if (condenseq->filename != NULL) {
    basename = gt_basename(condenseq->filename);
    if (strlen(basename) > (size_t) 1 &&
        (suffix_ptr = strrchr(basename + 1, '.')) != NULL) {
      /* remove suffix */
      *suffix_ptr = '\0';
    }
  }
  return basename;
}
Пример #2
0
static void default_track_selector(GtBlock *block, GtStr *result,
                                   GT_UNUSED void *data)
{
  GtGenomeNode *top;
  char *basename;
  gt_assert(block && result);
  gt_str_reset(result);
  top = (GtGenomeNode*) gt_block_get_top_level_feature(block);
  /* we take the basename of the filename to have nicer output in the
     generated graphic. this might lead to ``collapsed'' tracks, if two files
     with different paths have the same basename. */
  basename = gt_basename(gt_genome_node_get_filename(top));
  gt_str_append_cstr(result, basename);
  gt_free(basename);
  gt_str_append_char(result, GT_FILENAME_TYPE_SEPARATOR);
  gt_str_append_cstr(result, gt_block_get_type(block));
}
Пример #3
0
static int gt_encseq_encode_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, GT_UNUSED void *tool_arguments,
                               GtError *err)
{
  int had_err = 0,
      i;
  GtEncseqEncodeArguments *arguments =
                                      (GtEncseqEncodeArguments*) tool_arguments;
  GtStrArray *infiles;
  gt_error_check(err);

  infiles = gt_str_array_new();
  for (i = parsed_args; i < argc; i++) {
    gt_str_array_add_cstr(infiles, argv[i]);
  }

  if (gt_str_length(arguments->indexname) == 0UL) {
    if (gt_str_array_size(infiles) > 1UL) {
      gt_error_set(err,"if more than one input file is given, then "
                       "option -indexname is mandatory");
      had_err = -1;
    } else {
      char *basenameptr;
      basenameptr = gt_basename(gt_str_array_get(infiles, 0UL));
      gt_str_set(arguments->indexname, basenameptr);
      gt_free(basenameptr);
    }
  }

  if (!had_err) {
    gt_assert(gt_str_length(arguments->indexname) > 0UL);
    had_err = encode_sequence_files(infiles,
                                    arguments->eopts,
                                    gt_str_get(arguments->indexname),
                                    arguments->verbose,
                                    arguments->no_esq_header,
                                    err);
  }

  if (!had_err && arguments->showstats)
    show_encoded_statistics(infiles, gt_str_get(arguments->indexname));

  gt_str_array_delete(infiles);
  return had_err;
}
static int gt_compreads_compress_arguments_check(GT_UNUSED int rest_argc,
                                       void *tool_arguments,
                                       GtError *err)
{
  int had_err = 0;
  GtCsrHcrEncodeArguments *arguments = tool_arguments;
  GtSplitter *splitter = NULL;
  GtStr *buffer;
  gt_error_check(err);
  gt_assert(arguments);

  if (gt_str_array_size(arguments->files) == 0) {
    gt_error_set(err, "option \"-files\" is mandatory and requires"
                      " at least one filename as argument!");
    had_err = -1;
  }

  if (!had_err) {
    if (gt_str_length(arguments->name) == 0) {
      if (gt_str_array_size(arguments->files) > 1UL) {
        gt_error_set(err, "option \"-name\" needs to be specified"
                          " if more than one file is given");
        had_err = -1;
      }
      else {
        GtUword i;
        char *basename;
        splitter = gt_splitter_new();
        basename = gt_basename(gt_str_array_get(arguments->files, 0));
        buffer = gt_str_new_cstr(basename);
        gt_splitter_split(splitter, gt_str_get(buffer), gt_str_length(buffer),
                          '.');
        for (i = 0; i < gt_splitter_size(splitter) - 1; i++) {
          gt_str_append_cstr(arguments->name,
                             gt_splitter_get_token(splitter, i));
          if (i < gt_splitter_size(splitter) - 2)
            gt_str_append_char(arguments->name, '.');
        }
        gt_free(basename);
        gt_splitter_delete(splitter);
        gt_str_delete(buffer);
      }
    }
  }

  if (!had_err) {
    char *sampling_type = gt_str_get(arguments->method);
    static const char *methods[] = { "page", "regular", "none" };

    if (!strcmp(methods[0], sampling_type)) {
      arguments->pagewise = true;
      if (arguments->srate == GT_UNDEF_UWORD)
        arguments->srate = GT_SAMPLING_DEFAULT_PAGE_RATE;
      else if (arguments->srate == 0) {
        gt_error_set(err, "page sampling was chosen, but sampling"
                          " rate was set to "GT_WU"! this seems wrong.",
                     arguments->srate);
        had_err = -1;
      }
    }
    else if (!strcmp(methods[1], sampling_type)) {
      arguments->regular = true;
      if (arguments->srate == GT_UNDEF_UWORD)
        arguments->srate = GT_SAMPLING_DEFAULT_REGULAR_RATE;
      else if (arguments->srate == 0) {
        gt_error_set(err, "regular sampling was chosen, but sampling rate "
                          " was set to "GT_WU"! this seems wrong.",
                     arguments->srate);
        had_err = -1;
      }
    }
    else if (!strcmp(methods[2], sampling_type)) {
      if (arguments->srate == GT_UNDEF_UWORD)
        arguments->srate = 0;
      else if (arguments->srate != 0) {
        gt_error_set(err, "no sampling was chosen, but sampling rate was"
                          " set to "GT_WU"! this seems wrong.",
                          arguments->srate);
        had_err = -1;
      }
    }
    else {
      gt_error_set(err, "somethings wrong with the stype option");
      had_err = -1;
    }
  }

  if (!had_err) {
    if (arguments->arg_range.start != GT_UNDEF_UWORD) {
      if (arguments->arg_range.start <= (GtUword) UINT_MAX) {
        gt_safe_assign(arguments->qrng.start, arguments->arg_range.start);
        if (arguments->arg_range.end <= (GtUword) UINT_MAX)
          gt_safe_assign(arguments->qrng.end, arguments->arg_range.end);
        else
          had_err = -1;
      }
      else
        had_err = -1;
    }
    if (had_err)
      gt_error_set(err, "Range for qualities: value to large! larger than %u",
                   UINT_MAX);
  }
  return had_err;
}
static int gt_compreads_decompress_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments, GtError *err)
{
  GtCsrHcrDecodeArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlphabet *alpha = NULL;
  GtHcrDecoder *hcrd = NULL;
  GtTimer *timer = NULL;
  unsigned long start,
                end;

  gt_error_check(err);
  gt_assert(arguments);

  if (gt_showtime_enabled()) {
    timer = gt_timer_new_with_progress_description("start");
    gt_timer_start(timer);
    gt_assert(timer);
  }

  if (gt_str_length(arguments->smap) > 0) {
    alpha = gt_alphabet_new_from_file_no_suffix(gt_str_get(arguments->smap),
                                                err);
    if (!alpha)
      had_err = -1;
  }
  else {
    alpha = gt_alphabet_new_dna();
    if (!alpha)
      had_err = -1;
  }

  if (!had_err) {
    if (timer != NULL)
      gt_timer_show_progress(timer, "decoding", stdout);

    if (gt_str_length(arguments->name) == 0) {
      char *basenameptr;
      basenameptr = gt_basename(gt_str_get(arguments->file));
      gt_str_set(arguments->name, basenameptr);
      gt_free(basenameptr);
    }
    hcrd = gt_hcr_decoder_new(gt_str_get(arguments->file), alpha,
                              arguments->descs, timer, err);
    if (hcrd == NULL)
      had_err = -1;
    else {
      if (arguments->bench != 0) {
        had_err = gt_compreads_decompress_benchmark(hcrd,
                                                    arguments->bench,
                                                    timer, err);
      }
      else {
        if (arguments->rng.start != GT_UNDEF_ULONG
            && arguments->rng.end != GT_UNDEF_ULONG) {
          if (arguments->rng.start >= gt_hcr_decoder_num_of_reads(hcrd)
                || arguments->rng.end >= gt_hcr_decoder_num_of_reads(hcrd)) {
            gt_error_set(err, "range %lu-%lu includes a read number exceeding "
                              "the total number of reads (%lu)",
                              arguments->rng.start,
                              arguments->rng.end,
                              gt_hcr_decoder_num_of_reads(hcrd));
            had_err = -1;
          }
          start = arguments->rng.start;
          end = arguments->rng.end;
        }
        else {
          start = 0;
          end = gt_hcr_decoder_num_of_reads(hcrd) - 1;
        }
        if (!had_err) {
          gt_log_log("filebasename: %s", gt_str_get(arguments->name));
          if (gt_hcr_decoder_decode_range(hcrd, gt_str_get(arguments->name),
                                          start, end, timer, err)
            != 0)
            had_err = -1;
        }
      }
    }
    gt_hcr_decoder_delete(hcrd);
  }
  gt_alphabet_delete(alpha);
  if (timer != NULL) {
    gt_timer_show_progress_final(timer, stdout);
    gt_timer_delete(timer);
  }
  if (had_err)
    gt_assert(gt_error_is_set(err));
  return had_err;
}
Пример #6
0
static void make_sequence_region(GtHashmap *sequence_regions,
                                 GtStr *sequenceid,
                                 GthRegionFactory *srf,
                                 GthInput *input,
                                 GtUword filenum,
                                 GtUword seqnum)
{
    GtUword offset_is_defined = false;
    GtRange range, descrange;
    GtGenomeNode *sr = NULL;
    gt_assert(sequence_regions && sequenceid && srf && input);
    if (gth_input_use_substring_spec(input)) {
        range.start = gth_input_genomic_substring_from(input);
        range.end   = gth_input_genomic_substring_to(input);
    }
    else {
        range = gth_input_get_relative_genomic_range(input, filenum, seqnum);
    }
    if (srf->use_desc_ranges) {
        GtStr *description = gt_str_new();
        gth_input_get_genomic_description(input, description, filenum, seqnum);
        if (!gt_parse_description_range(gt_str_get(description), &descrange))
            offset_is_defined = true;
        gt_str_delete(description);
    }
    if (offset_is_defined)
        range = gt_range_offset(&range, descrange.start);
    else
        range = gt_range_offset(&range, 1); /* 1-based */
    if (!gt_str_length(sequenceid) ||
            (gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)) &&
             !offset_is_defined)) {
        /* sequenceid is empty or exists already (and no offset has been parsed)
           -> make one up */
        GtStr *seqid;
        char *base;
        base = gt_basename(gth_input_get_genomic_filename(input, filenum));
        seqid = gt_str_new_cstr(base);
        gt_free(base);
        gt_str_append_char(seqid, '|');
        gt_str_append_uword(seqid, seqnum + 1); /* 1-based */
        seqid_store_add(srf->seqid_store, filenum, seqnum, seqid, GT_UNDEF_UWORD);
        gt_assert(!gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid)));
        gt_cstr_table_add(srf->used_seqids, gt_str_get(seqid));
        sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum),
                                range.start, range.end);
        gt_hashmap_add(sequence_regions,
                       (void*) gt_cstr_table_get(srf->used_seqids,
                               gt_str_get(seqid)),
                       sr);
        gt_str_delete(seqid);
    }
    else {
        /* sequenceid does not exists already (or an offset has been parsed)
           -> use this one */
        if (!gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid))) {
            /* no sequence region with this id exists -> create one */
            gt_cstr_table_add(srf->used_seqids, gt_str_get(sequenceid));
            seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid,
                            offset_is_defined ? descrange.start : GT_UNDEF_UWORD);
            sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum,
                                                    seqnum), range.start, range.end);
            gt_hashmap_add(sequence_regions,
                           (void*) gt_cstr_table_get(srf->used_seqids,
                                   gt_str_get(sequenceid)),
                           sr);
        }
        else {
            GtRange prev_range, new_range;
            /* sequence region with this id exists already -> modify range */
            sr = gt_hashmap_get(sequence_regions, gt_str_get(sequenceid));
            gt_assert(sr);
            prev_range = gt_genome_node_get_range(sr);
            new_range = gt_range_join(&prev_range, &range);
            gt_genome_node_set_range(sr, &new_range);
            seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid,
                            offset_is_defined ? descrange.start : GT_UNDEF_UWORD);
        }
    }
    gt_assert(sr);
}
Пример #7
0
static int gt_condenseq_compress_runner(GT_UNUSED int argc, const char **argv,
                                        int parsed_args, void *tool_arguments,
                                        GtError *err)
{
  GtCondenseqCompressArguments *arguments = tool_arguments;
  GtLogger *logger,
           *kdb_logger;
  FILE *kmer_fp = NULL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);
  kdb_logger = gt_logger_new(arguments->kdb, GT_LOGGER_DEFLT_PREFIX, stderr);
  if (arguments->kdb) {
    kmer_fp = gt_fa_fopen("kmer_db.out", "w", err);
    gt_logger_set_target(kdb_logger, kmer_fp);
  }

  if (gt_str_length(arguments->indexname) == 0UL) {
    char *basenameptr;
    basenameptr = gt_basename(argv[parsed_args]);
    gt_str_set(arguments->indexname, basenameptr);
    gt_free(basenameptr);
  }

  if (!had_err) {
    GtEncseqLoader *es_l = gt_encseq_loader_new();
    arguments->input_es = gt_encseq_loader_load(es_l, argv[parsed_args], err);
    if (arguments->input_es == NULL)
      had_err = -1;
    gt_encseq_loader_delete(es_l);
  }

  if (!had_err) {
    if (arguments->minalignlength == GT_UNDEF_UWORD)
      arguments->minalignlength = arguments->initsize != GT_UNDEF_UWORD ?
                                  arguments->initsize / (GtUword) 3UL :
                                  GT_UNDEF_UWORD;
    if (arguments->windowsize == GT_UNDEF_UINT)
      arguments->windowsize = arguments->minalignlength != GT_UNDEF_UWORD ?
                              (unsigned int) (arguments->minalignlength / 5U) :
                              GT_UNDEF_UINT;
    if (arguments->windowsize < 4U)
      arguments->windowsize = 4U;
    if (arguments->kmersize == GT_UNDEF_UINT) {
      unsigned int size =
        gt_alphabet_num_of_chars(gt_encseq_alphabet(arguments->input_es));
      /* size^k ~= 100000 */
      gt_safe_assign(arguments->kmersize,
                     gt_round_to_long(gt_log_base(100000.0, (double) size)));
      gt_logger_log(logger, "|A|: %u, k: %u",
                    size, arguments->kmersize);
    }

    if (arguments->windowsize == GT_UNDEF_UINT) {
      arguments->windowsize = 5U * arguments->kmersize;
    }
    if (arguments->minalignlength == GT_UNDEF_UWORD) {
      arguments->minalignlength = (GtUword) (3UL * arguments->windowsize);
    }
    if (arguments->initsize == GT_UNDEF_UWORD) {
      arguments->initsize = (GtUword) (3UL * arguments->minalignlength);
    }
  }
  if (!had_err &&
      arguments->windowsize <= arguments->kmersize) {
    gt_error_set(err, "-windowsize (%u) must be larger -kmersize (%u)!",
                 arguments->windowsize, arguments->kmersize);
    had_err = -1;
  }
  if (!had_err &&
      arguments->minalignlength < (GtUword) arguments->windowsize) {
    gt_error_set(err, "-alignlength (" GT_WU ") must be at least "
                 "-windowsize (%u)!", arguments->minalignlength,
                 arguments->windowsize);
    had_err = -1;
  }
  if (!had_err && (arguments->initsize < arguments->minalignlength)) {
    gt_error_set(err, "-initsize (" GT_WU ") must be at least "
                 "-alignlength (" GT_WU ")!", arguments->initsize,
                 arguments->minalignlength);
    had_err = -1;
  }

  if (!had_err) {
    GtCondenseqCreator *ces_c;

    if (!had_err) {
      ces_c = gt_condenseq_creator_new(arguments->initsize,
                                       arguments->minalignlength,
                                       arguments->xdrop,
                                       &(arguments->scores),
                                       arguments->kmersize,
                                       arguments->windowsize,
                                       logger,
                                       err);
      if (ces_c == NULL)
        had_err = -1;
    }
    if (!had_err) {
      if (arguments->cutoff_value == GT_UNDEF_UWORD)
        gt_condenseq_creator_use_mean_cutoff(ces_c);
      else if (arguments->cutoff_value == 0)
        gt_condenseq_creator_disable_cutoff(ces_c);
      else
        gt_condenseq_creator_set_cutoff(ces_c, arguments->cutoff_value);
      gt_condenseq_creator_set_mean_fraction(ces_c, arguments->fraction);
      if (arguments->prune)
        gt_condenseq_creator_disable_prune(ces_c);
      if (arguments->brute)
        gt_condenseq_creator_enable_brute_force(ces_c);
      if (!arguments->diags)
        gt_condenseq_creator_disable_diagonals(ces_c);
      if (arguments->full_diags)
        gt_condenseq_creator_enable_full_diagonals(ces_c);
      if (arguments->clean_percent != GT_UNDEF_UINT)
        gt_condenseq_creator_set_diags_clean_limit(ces_c,
                                                   arguments->clean_percent);

      had_err = gt_condenseq_creator_create(ces_c,
                                            arguments->indexname,
                                            arguments->input_es,
                                            logger, kdb_logger, err);

      gt_condenseq_creator_delete(ces_c);
    }
  }

  gt_logger_delete(logger);
  gt_logger_delete(kdb_logger);
  if (arguments->kdb)
    gt_fa_fclose(kmer_fp);
  return had_err;
}
Пример #8
0
int gt_gtdata_show_help(const char *progname, GT_UNUSED void *unused,
                        GtError *err)
{
  GtSplitter *splitter;
  GtStr *doc_file;
  lua_State *L = NULL;
  char *prog, *bn;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(progname);

  prog = gt_cstr_dup(progname); /* create modifiable copy for splitter */
  splitter = gt_splitter_new();
  gt_splitter_split(splitter, prog, strlen(prog), ' ');
  doc_file = gt_get_gtdata_path(gt_splitter_get_token(splitter, 0), err);
  if (!doc_file)
    had_err = -1;

  if (!had_err) {
    gt_str_append_cstr(doc_file, "/doc/");
    /* create Lua & push gtdata_doc_dir to Lua */
    L = luaL_newstate();
    if (!L) {
      gt_error_set(err, "out of memory (cannot create new Lua state)");
      had_err = -1;
    }
  }

  if (!had_err) {
    luaL_openlibs(L);
    lua_pushstring(L, gt_str_get(doc_file));
    lua_setglobal(L, "gtdata_doc_dir");
    /* finish creating doc_file */
    if (gt_splitter_size(splitter) == 1) {
      /* special case for `gt` */
      bn = gt_basename(progname);
      gt_str_append_cstr(doc_file, bn);
      gt_free(bn);
    }
    else {
      /* general case for the tools */
      gt_str_append_cstr(doc_file,
                      gt_splitter_get_token(splitter,
                                         gt_splitter_size(splitter) - 1));
    }
    gt_str_append_cstr(doc_file, ".lua");
    /* execute doc_file */
    if (luaL_loadfile(L, gt_str_get(doc_file)) || lua_pcall(L, 0, 0, 0)) {
      gt_error_set(err, "cannot run doc file: %s", lua_tostring(L, -1));
      had_err = -1;
    }
  }

  /* free */
  if (L) lua_close(L);
  gt_str_delete(doc_file);
  gt_splitter_delete(splitter);
  gt_free(prog);

  return had_err;
}
Пример #9
0
static GtOPrval parsemkfmindex(Mkfmcallinfo *mkfmcallinfo,
                               int argc,
                               const char **argv,
                               GtError *err)
{
  GtOptionParser *op;
  GtOption *option, *optionfmout;
  GtOPrval oprval;
  int parsed_args;

  gt_error_check(err);
  mkfmcallinfo->indexnametab = gt_str_array_new();
  mkfmcallinfo->outfmindex = gt_str_new();
  mkfmcallinfo->leveldesc = gt_str_new();
  op = gt_option_parser_new("[option ...] -ii indexfile [...]",
                         "Compute FM-index.");
  gt_option_parser_set_mail_address(op, "<*****@*****.**>");
  optionfmout = gt_option_new_string("fmout",
                             "specify name of FM-index to be generated\n"
                             "(mandatory if more than one input index "
                             "is specified)",
                             mkfmcallinfo->outfmindex, NULL);
  gt_option_parser_add_option(op, optionfmout);

  option = gt_option_new_filename_array("ii", "specify indices to be used",
                                        mkfmcallinfo->indexnametab);
  gt_option_is_mandatory(option);
  gt_option_parser_add_option(op, option);

  option = gt_option_new_string("size",
                             "specify size (tiny, small, medium, big)",
                             mkfmcallinfo->leveldesc, "medium");
  gt_option_parser_add_option(op, option);

  option = gt_option_new_bool("noindexpos",
                           "store no index positions (hence the positions of\n"
                           "matches in the index cannot be retrieved)",
                           &mkfmcallinfo->noindexpos,false);
  gt_option_parser_add_option(op, option);

  oprval = gt_option_parser_parse(op, &parsed_args, argc, argv, gt_versionfunc,
                                  err);
  if (oprval == GT_OPTION_PARSER_OK)
  {
    if (!gt_option_is_set(optionfmout))
    {
      if (gt_str_array_size(mkfmcallinfo->indexnametab) > 1UL)
      {
        gt_error_set(err,"if more than one index is given, then "
                          "option -fmout is mandatory");
        oprval = GT_OPTION_PARSER_ERROR;
      } else
      {
        char *basenameptr;

        basenameptr = gt_basename(gt_str_array_get(mkfmcallinfo->indexnametab,
                                  0));
        gt_str_set(mkfmcallinfo->outfmindex,basenameptr);
        gt_free(basenameptr);
      }
    }
  }
  gt_option_parser_delete(op);
  if (oprval == GT_OPTION_PARSER_OK && parsed_args != argc)
  {
    gt_error_set(err,"superfluous program parameters");
    oprval = GT_OPTION_PARSER_ERROR;
  }
  return oprval;
}
Пример #10
0
static GtOPrval parse_options(int *parsed_args,
                              bool doesa,
                              Suffixeratoroptions *so,
                              int argc, const char **argv, GtError *err)
{
  GtOptionParser *op;
  GtOption *option,
           *optionshowprogress,
           *optiongenomediff,
           *optionii;
  GtOPrval oprval;
  gt_error_check(err);

  op = gt_option_parser_new("[option ...] (-db file [...] | -ii index)",
                            doesa ? "Compute enhanced suffix array."
                                  : "Compute packed index.");
  gt_option_parser_set_mail_address(op, "<*****@*****.**>");

  /* input info */
  so->indexname = gt_str_new();
  so->inputindex = gt_str_new();
  so->db = gt_str_array_new();

  /* register options for encoded sequence handling */
  so->encopts = gt_encseq_options_register_encoding(op, so->indexname, so->db);
  so->loadopts = gt_encseq_options_register_loading(op, so->indexname);

  /* register options for index handling */
  if (doesa)
    so->idxopts = gt_index_options_register_esa(op, so->encopts);
  else
    so->idxopts = gt_index_options_register_packedidx(op, so->indexname,
                                                      so->encopts);

  /* verbosity */
  option = gt_option_new_verbose(&so->beverbose);
  gt_option_parser_add_option(op, option);

  optionshowprogress = gt_option_new_bool("showprogress",
                                          "show a progress bar",
                                          &so->showprogress,
                                          false);
  gt_option_parser_add_option(op, optionshowprogress);

  optionii = gt_option_new_filename("ii", "specify existing encoded sequence",
                                    so->inputindex);
  gt_option_parser_add_option(op, optionii);
  gt_option_is_mandatory_either(gt_encseq_options_db_option(so->encopts),
                                optionii);
  gt_option_exclude(gt_encseq_options_db_option(so->encopts), optionii);
  gt_option_exclude(optionii, gt_encseq_options_smap_option(so->encopts));
  gt_option_exclude(optionii, gt_encseq_options_dna_option(so->encopts));
  gt_option_exclude(optionii, gt_encseq_options_protein_option(so->encopts));
  gt_option_exclude(optionii, gt_encseq_options_plain_option(so->encopts));
  gt_option_exclude(optionii, gt_encseq_options_sat_option(so->encopts));

  optiongenomediff = gt_option_new_bool("genomediff",
                                   "directly process the lcp intervals using "
                                   "the genomediff algorithm (suffix array and "
                                   "lcp-tables are not output)",
                                   &so->genomediff,
                                   false);
  gt_option_is_extended_option(optiongenomediff);
  if (gt_index_options_outsuftab_option(so->idxopts) != NULL) {
    gt_option_exclude(optiongenomediff,
                      gt_index_options_outsuftab_option(so->idxopts));
  }
  gt_option_parser_add_option(op, optiongenomediff);

  /* suffixerator and friends do not take arguments */
  gt_option_parser_set_min_max_args(op, 0U, 0U);

  oprval = gt_option_parser_parse(op, parsed_args, argc, argv, gt_versionfunc,
                                  err);

  if (gt_str_length(so->indexname) == 0UL) {
    /* we do not have an indexname yet, so there was none given in the
       -indexname option and it could not be derived from the input filenames.
       So it must be in the -ii parameter. */
    char *basenameptr;
    basenameptr = gt_basename(gt_str_get(so->inputindex));
    gt_str_set(so->indexname, basenameptr);
    gt_free(basenameptr);
  }

  gt_option_parser_delete(op);

  return oprval;
}
static int gt_condenser_search_runner(GT_UNUSED int argc,
                                      GT_UNUSED const char **argv,
                                      GT_UNUSED int parsed_args,
                                      void *tool_arguments,
                                      GtError *err)
{
  GtCondenserSearchArguments *arguments = tool_arguments;
  int i, had_err = 0;
  char *querypath = gt_str_get(arguments->querypath);
  GtStr* coarse_fname = gt_str_new_cstr("coarse_");
  char *db_basename = NULL;
  char *suffix_ptr = NULL;
  GtTimer *timer = NULL;
  GtLogger *logger = NULL;

  gt_error_check(err);
  gt_assert(arguments);

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);

  db_basename = gt_basename(gt_str_get(arguments->dbpath));
  /* if first char is '.' this might be a hidden file */
  if (strlen(db_basename) > (size_t) 1 &&
      (suffix_ptr = strrchr(db_basename + 1, '.')) != NULL) {
    /* remove suffix */
    *suffix_ptr = '\0';
  }
  gt_str_append_cstr(coarse_fname, db_basename);
  gt_str_append_cstr(coarse_fname, ".fas");
  gt_free(db_basename);
  db_basename = NULL;
  suffix_ptr = NULL;

  if (arguments->blastn || arguments->blastp) {
    GtMatch              *match;
    GtMatchIterator      *mp = NULL;
    GtNREncseq           *nrencseq = NULL;
    GtStr                *fastaname = gt_str_clone(arguments->dbpath);
    HitPosition          *hits;
    double                eval,
                          raw_eval = 0.0;
    GtUword               coarse_db_len = 0;
    GtMatchIteratorStatus status;
    int                   curr_hits = 0,
                          max_hits = 100;

    hits = gt_malloc(sizeof (*hits) * (size_t) max_hits);

    gt_str_append_cstr(fastaname, ".fas");

    for (i=0; i < max_hits; i++) {
      hits[i].range = gt_malloc(sizeof (*hits[i].range) * (size_t) 1);
    }

    if (gt_showtime_enabled()) {
      timer = gt_timer_new_with_progress_description("initialization");
      gt_timer_start(timer);
    }

    /*extract sequences from compressed database*/
    if (!had_err) {
      nrencseq = gt_n_r_encseq_new_from_file(gt_str_get(arguments->dbpath),
                                             logger, err);
      if (nrencseq == NULL)
        had_err = -1;
    }
    if (!had_err) {
      if (arguments->ceval == GT_UNDEF_DOUBLE ||
          arguments->feval == GT_UNDEF_DOUBLE) {
        /* from NCBI BLAST tutorial:
           E = Kmne^{-lambdaS}
           calculates E-value for score S with natural scale parameters K for
           search space size and lambda for the scoring system
           E = mn2^-S'
           m being the subject (total) length, n the length of ONE query
           calculates E-value for bit-score S'
         */
        GtFastaReader *reader;
        GtCondenserSearchAvg avg = {0,0};
        reader = gt_fasta_reader_rec_new(arguments->querypath);
        had_err = gt_fasta_reader_run(reader, NULL, NULL,
                                      gt_condenser_search_cum_moving_avg,
                                      &avg,
                                      err);
        if (!had_err) {
          GtUword S = arguments->bitscore;
          gt_log_log(GT_WU " queries, avg query size: " GT_WU,
                     avg.count, avg.avg);
          raw_eval = 1/pow(2.0, (double) S) * avg.avg;
          gt_logger_log(logger, "Raw E-value set to %.4e", raw_eval);
          gt_assert(avg.avg != 0);
        }
        gt_fasta_reader_delete(reader);
      }
    }

    /*create BLAST database from compressed database fasta file*/
    if (!had_err) {
      if (timer != NULL)
        gt_timer_show_progress(timer, "create coarse BLAST db", stderr);
      if (arguments->blastn)
        had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(fastaname),
                                                          err);
      else
        had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(fastaname),
                                                          err);
    }

    if (!had_err) {
      GtBlastProcessCall *call;

      if (timer != NULL)
        gt_timer_show_progress(timer, "coarse BLAST run", stderr);

      if (arguments->blastp)
        call = gt_blast_process_call_new_prot();
      else
        call = gt_blast_process_call_new_nucl();
      gt_blast_process_call_set_db(call, gt_str_get(fastaname));
      gt_blast_process_call_set_query(call, querypath);
      gt_blast_process_call_set_evalue(call, arguments->ceval);
      gt_blast_process_call_set_num_threads(call, arguments->blthreads);

      mp = gt_match_iterator_blast_process_new(call, err);
      if (!mp)
        had_err = -1;

      gt_blast_process_call_delete(call);

      while (!had_err &&
             (status = gt_match_iterator_next(mp, &match, err)) !=
             GT_MATCHER_STATUS_END)
      {
        if (status == GT_MATCHER_STATUS_OK) {
          GtUword hit_seq_id;
          char string[7];
          const char *dbseqid = gt_match_get_seqid2(match);
          if (sscanf(dbseqid,"%6s" GT_WU, string, &hit_seq_id) == 2) {
            gt_match_get_range_seq2(match, hits[curr_hits].range);
            hits[curr_hits].idx = hit_seq_id;
            gt_match_delete(match);
            curr_hits++;
            if (curr_hits == max_hits) {
              HitPosition *hit_extention;
              max_hits += 100;
              hits = gt_realloc(hits, sizeof (*hit_extention) * max_hits);
              for (i=max_hits - 100; i < max_hits; i++) {
                hits[i].range = gt_malloc(sizeof (*hits[i].range));
              }
            }
          } else {
            gt_error_set(err, "could not parse unique db header %s", dbseqid);
            had_err = -1;
          }
        } else if (status == GT_MATCHER_STATUS_ERROR) {
          had_err = -1;
        }
      }
      gt_match_iterator_delete(mp);
    }
    /*extract sequences*/
    if (!had_err) {
      GtNREncseqDecompressor *decomp;
      GtFile *coarse_hits;
      if (timer != NULL)
        gt_timer_show_progress(timer, "extract coarse search hits", stderr);
      decomp = gt_n_r_encseq_decompressor_new(nrencseq);
      coarse_hits = gt_file_new(gt_str_get(coarse_fname),"w", err);
      /* TODO DW do NOT extract complete uniques! these could be complete
         chromosomes!! just extract something around it? maybe +- max query
         length*/
      for (i = 0; i < curr_hits; i++) {
        gt_n_r_encseq_decompressor_add_unique_idx_to_extract(decomp,
                                                             hits[i].idx);
      }
      had_err =
        gt_n_r_encseq_decompressor_start_unique_extraction(coarse_hits,
                                                           decomp,
                                                           &coarse_db_len,
                                                           err);
      gt_assert(coarse_db_len != 0);
      gt_file_delete(coarse_hits);
      gt_n_r_encseq_decompressor_delete(decomp);
    }
    gt_n_r_encseq_delete(nrencseq);

    /* create BLAST database from decompressed database file */
    if (!had_err) {
      if (timer != NULL)
        gt_timer_show_progress(timer, "create fine BLAST db", stderr);
      if (arguments->blastn)
        had_err =
          gt_condenser_search_create_nucl_blastdb(gt_str_get(coarse_fname),
                                                  err);
      else
        had_err =
          gt_condenser_search_create_prot_blastdb(gt_str_get(coarse_fname),
                                                  err);
    }
    /* perform fine BLAST search */
    if (!had_err) {
      GtBlastProcessCall *call;

      if (timer != NULL)
        gt_timer_show_progress(timer, "fine BLAST run", stderr);

      if (arguments->feval == GT_UNDEF_DOUBLE) {
        eval = raw_eval * coarse_db_len;
      } else {
        eval = arguments->feval;
      }

      if (arguments->blastp)
        call = gt_blast_process_call_new_prot();
      else
        call = gt_blast_process_call_new_nucl();

      gt_blast_process_call_set_db(call, gt_str_get(coarse_fname));
      gt_blast_process_call_set_query(call, querypath);
      gt_blast_process_call_set_evalue(call, eval);
      gt_blast_process_call_set_num_threads(call, arguments->blthreads);

      gt_logger_log(logger, "Fine E-value set to: %.4e (len)" GT_WU, eval,
                    coarse_db_len);

      mp = gt_match_iterator_blast_process_new(call, err);
      if (!mp)
        had_err = -1;

      gt_blast_process_call_delete(call);

      if (!had_err) {
        GtUword numofhits = 0;
        while (!had_err &&
               (status = gt_match_iterator_next(mp, &match, err)) !=
               GT_MATCHER_STATUS_END) {
          if (status == GT_MATCHER_STATUS_OK) {
            GtMatchBlast *matchb = (GtMatchBlast*) match;
            char *dbseqid = gt_malloc(sizeof (*dbseqid) * 50);
            GtRange range_seq1;
            GtRange range_seq2;
            numofhits++;
            gt_match_get_range_seq1(match, &range_seq1);
            gt_match_get_range_seq2(match, &range_seq2);
            gt_file_xprintf(
                    arguments->outfp,
                    "%s\t%s\t%.2f\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t"
                    GT_WU "\t%g\t%.3f\n",
                    gt_match_get_seqid1(match),
                    gt_match_get_seqid2(match),
                    gt_match_blast_get_similarity(matchb),
                    gt_match_blast_get_align_length(matchb),
                    range_seq1.start,
                    range_seq1.end,
                    range_seq2.start,
                    range_seq2.end,
                    gt_match_blast_get_evalue(matchb),
                    (double) gt_match_blast_get_bitscore(matchb));
            gt_match_delete(match);
            gt_free(dbseqid);
          } else if (status == GT_MATCHER_STATUS_ERROR) {
            had_err = -1;
          }
        }
        gt_log_log(GT_WU " hits found\n", numofhits);
      }
      gt_match_iterator_delete(mp);

    }
    if (!had_err)
      if (timer != NULL)
        gt_timer_show_progress_final(timer, stderr);
    gt_timer_delete(timer);

    /*cleanup*/
    for (i=0; i < max_hits; i++) {
      gt_free(hits[i].range);
    }
    gt_free(hits);
    gt_str_delete(fastaname);
  }
  gt_str_delete(coarse_fname);
  gt_logger_delete(logger);
  return had_err;
}