Exemple #1
0
static GtOptionParser* gt_seed_extend_option_parser_new(void *tool_arguments)
{
  GtSeedExtendArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *option, *op_gre, *op_xdr, *op_cam, *op_his, *op_dif, *op_pmh,
    *op_len, *op_err, *op_xbe, *op_sup, *op_frq, *op_mem, *op_ali, *op_bia;
  gt_assert(arguments != NULL);

  /* init */
  op = gt_option_parser_new("[option ...] encseq_basename [encseq_basename]",
                            "Calculate local alignments using the seed and "
                            "extend algorithm.");

  /* DIAGBANDSEED OPTIONS */

  /* -ii */
  option = gt_option_new_string("ii",
                                "Input index for encseq encoded sequences",
                                arguments->dbs_indexname,
                                "");
  gt_option_is_mandatory(option);
  gt_option_parser_add_option(op, option);

  /* -qii */
  option = gt_option_new_string("qii",
                                "Query input index (encseq)",
                                arguments->dbs_queryname,
                                "");
  gt_option_parser_add_option(op, option);

  /* -seedlength */
  op_len = gt_option_new_uint_min_max("seedlength",
                                      "Minimum length of a seed",
                                      &arguments->dbs_seedlength,
                                      14UL, 1UL, 32UL);
  gt_option_parser_add_option(op, op_len);

  /* -diagbandwidth */
  option = gt_option_new_uword("diagbandwidth",
                               "Logarithm of diagonal band width (for filter)",
                               &arguments->dbs_logdiagbandwidth,
                               6UL);
  gt_option_parser_add_option(op, option);

  /* -mincoverage */
  option = gt_option_new_uword("mincoverage",
                               "Minimum coverage in two neighbouring diagonal "
                               "bands (for filter)",
                               &arguments->dbs_mincoverage,
                               35UL);
  gt_option_parser_add_option(op, option);

  /* -maxfreq */
  op_frq = gt_option_new_uword_min("maxfreq",
                                   "Maximum frequency of a k-mer (for filter)",
                                   &arguments->dbs_maxfreq,
                                   GT_UWORD_MAX, 1UL);
  gt_option_parser_add_option(op, op_frq);

  /* -t */
  op_sup = gt_option_new_uword_min("t",
                                   "Suppress k-mers occurring at least t times "
                                   "(for filter)",
                                   &arguments->dbs_suppress,
                                   GT_UWORD_MAX, 2UL);
  gt_option_exclude(op_sup, op_frq);
  gt_option_is_development_option(op_sup);
  gt_option_parser_add_option(op, op_sup);

  /* -memlimit */
  op_mem = gt_option_new_string("memlimit",
                                "Maximum memory usage to determine the maximum "
                                "frequency of a k-mer (for filter)",
                                arguments->dbs_memlimit_str,
                                "");
  gt_option_parser_add_option(op, op_mem);

  /* -debug-kmer */
  option = gt_option_new_bool("debug-kmer",
                              "Output KmerPos lists",
                              &arguments->dbs_debug_kmer,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -debug-seedpair */
  option = gt_option_new_bool("debug-seedpair",
                              "Output SeedPair lists",
                              &arguments->dbs_debug_seedpair,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -verify */
  option = gt_option_new_bool("verify",
                              "Check that k-mer seeds occur in the sequences",
                              &arguments->dbs_verify,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* SEED EXTENSION OPTIONS */

  /* -extendxdrop */
  op_xdr = gt_option_new_uword_min_max("extendxdrop",
                                       "Extend seed to both sides using xdrop "
                                       "algorithm, optional parameter "
                                       "specifies sensitivity",
                                       &arguments->se_extendxdrop,
                                       97UL, 90UL, 100UL);
  gt_option_argument_is_optional(op_xdr);
  gt_option_parser_add_option(op, op_xdr);
  arguments->se_option_xdrop = gt_option_ref(op_xdr);

  /* -xdropbelow */
  op_xbe = gt_option_new_word("xdropbelow",
                              "Specify xdrop cutoff score (0 means "
                              "automatically defined depending on minidentity)",
                              &arguments->se_xdropbelowscore,
                              0L);
  gt_option_imply(op_xbe, op_xdr);
  gt_option_parser_add_option(op, op_xbe);

  /* -extendgreedy */
  op_gre = gt_option_new_uword_min_max("extendgreedy",
                                       "Extend seed to both sides using greedy "
                                       "algorithm, optional parameter "
                                       "specifies sensitivity",
                                       &arguments->se_extendgreedy,
                                       97UL, 90UL, 100UL);
  gt_option_argument_is_optional(op_gre);
  gt_option_exclude(op_gre, op_xdr);
  gt_option_parser_add_option(op, op_gre);
  arguments->se_option_greedy = gt_option_ref(op_gre);

  /* -history */
  op_his = gt_option_new_uword_min_max("history",
                                       "Size of (mis)match history in range [1"
                                       "..64] (trimming for greedy extension)",
                                       &arguments->se_historysize,
                                       60UL, 1UL, 64UL);
  gt_option_imply(op_his, op_gre);
  gt_option_parser_add_option(op, op_his);

  /* -maxalilendiff */
  op_dif = gt_option_new_uword("maxalilendiff",
                               "Maximum difference of alignment length "
                               "(trimming for greedy extension)",
                               &arguments->se_maxalilendiff, 0UL);
  gt_option_imply(op_dif, op_gre);
  gt_option_is_development_option(op_dif);
  gt_option_parser_add_option(op, op_dif);

  /* -percmathistory */
  op_pmh = gt_option_new_uword_min_max("percmathistory",
                                       "percentage of matches required in "
                                       "history (for greedy extension)",
                                       &arguments->se_perc_match_hist,
                                       0UL, 1UL, 100UL);
  gt_option_imply(op_pmh, op_gre);
  gt_option_is_development_option(op_pmh);
  gt_option_parser_add_option(op, op_pmh);

  /* -bias-parameters */
  op_bia = gt_option_new_bool("bias-parameters",
                              "Use -maxalilendiff 30 and let percmathistory "
                              "depend on minidentiy and DNA base distribution",
                              &arguments->bias_parameters,
                              false);
  gt_option_imply(op_bia, op_gre);
  gt_option_exclude(op_bia, op_pmh);
  gt_option_exclude(op_bia, op_dif);
  gt_option_is_development_option(op_bia);
  gt_option_parser_add_option(op, op_bia);

  /* -cam */
  op_cam = gt_option_new_string("cam",
                                gt_cam_extendgreedy_comment(),
                                arguments->se_char_access_mode,
                                "");
  gt_option_is_development_option(op_cam);
  gt_option_parser_add_option(op, op_cam);

  /* -l */
  op_len = gt_option_new_uword_min("l",
                                   "Minimum alignment length "
                                   "(for seed extension)",
                                   &arguments->se_alignlength,
                                   20UL, 1UL);
  gt_option_imply_either_2(op_len, op_xdr, op_gre);
  gt_option_parser_add_option(op, op_len);

  /* -minidentity */
  op_err = gt_option_new_uword_min_max("minidentity",
                                       "Minimum identity of matches "
                                       "(for seed extension)",
                                       &arguments->se_minidentity,
                                       80UL, GT_EXTEND_MIN_IDENTITY_PERCENTAGE,
                                       99UL);
  gt_option_imply_either_2(op_err, op_xdr, op_gre);
  gt_option_parser_add_option(op, op_err);

  /* -a */
  op_ali = gt_option_new_uword_min("a",
                                   "show alignments/sequences (optional "
                                   "argument is number of columns per line)",
                                   &arguments->se_alignmentwidth,
                                   70, 20);
  gt_option_argument_is_optional(op_ali);
  gt_option_parser_add_option(op, op_ali);
  arguments->se_option_withali = gt_option_ref(op_ali);

  /* -mirror */
  option = gt_option_new_bool("mirror",
                              "Add reverse complement reads",
                              &arguments->mirror,
                              false);
  gt_option_parser_add_option(op, option);

  /* -overlappingseeds */
  option = gt_option_new_bool("overlappingseeds",
                              "Allow overlapping SeedPairs",
                              &arguments->overlappingseeds,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -benchmark */
  option = gt_option_new_bool("benchmark",
                              "Measure total running time and be silent",
                              &arguments->benchmark,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -seed-display */
  option = gt_option_new_bool("seed-display",
                              "Display seeds in #-line",
                              &arguments->seed_display,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -v */
  option = gt_option_new_verbose(&arguments->verbose);
  gt_option_parser_add_option(op, option);

  return op;
}
Exemple #2
0
static GtOptionParser* gt_gff3_option_parser_new(void *tool_arguments)
{
  GFF3Arguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *sort_option, *load_option, *strict_option, *tidy_option,
           *mergefeat_option, *addintrons_option, *offset_option,
           *offsetfile_option, *setsource_option, *option;
  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[option ...] [GFF3_file ...]", "Parse, possibly "
                            "transform, and output GFF3 files.");

  /* -sort */
  sort_option = gt_option_new_bool("sort", "sort the GFF3 features (memory "
                                   "consumption is proportional to the input "
                                   "file size(s))",
                                   &arguments->sort, false);
  gt_option_parser_add_option(op, sort_option);

  /* -strict */
  strict_option = gt_option_new_bool("strict", "be very strict during GFF3 "
                                     "parsing (stricter than the specification "
                                     "requires)", &arguments->strict, false);
  gt_option_is_development_option(strict_option);
  gt_option_parser_add_option(op, strict_option);

  /* -tidy */
  tidy_option = gt_option_new_bool("tidy", "try to tidy the GFF3 files up "
                                   "during parsing", &arguments->tidy, false);
  gt_option_parser_add_option(op, tidy_option);
  gt_option_exclude(strict_option, tidy_option);

  /* -retainids */
  option = gt_option_new_bool("retainids",
                              "when available, use the original IDs provided "
                              "in the source file\n"
                              "(memory consumption is proportional to the "
                              "input file size(s))", &arguments->retainids,
                              false);
  gt_option_parser_add_option(op, option);

  /* -checkids */
  option = gt_option_new_bool("checkids",
                              "make sure the ID attributes are unique "
                              "within the scope of each GFF3_file, as required "
                              "by GFF3 specification\n"
                              "(memory consumption is proportional to the "
                              "input file size(s))", &arguments->checkids,
                              false);
  gt_option_parser_add_option(op, option);

  /* -addids */
  option = gt_option_new_bool("addids", "add missing \""
                              GT_GFF_SEQUENCE_REGION"\" lines automatically",
                              &arguments->addids, true);
  gt_option_parser_add_option(op, option);

  /* -fixregionboundaries */
  option = gt_option_new_bool("fixregionboundaries", "automatically adjust \""
                              GT_GFF_SEQUENCE_REGION"\" lines to contain all "
                              "their features (memory consumption is "
                              "proportional to the input file size(s))",
                              &arguments->fixboundaries, false);
  gt_option_parser_add_option(op, option);

  /* -mergefeat */
  mergefeat_option = gt_option_new_bool("mergefeat",
                                        "merge adjacent features of the same "
                                        "type", &arguments->mergefeat, false);
  gt_option_is_development_option(mergefeat_option);
  gt_option_imply(mergefeat_option, sort_option);
  gt_option_parser_add_option(op, mergefeat_option);

  /* -load */
  load_option = gt_option_new_bool("load", "load the GFF3 features into memory "
                                   "(requires space proportional to the input "
                                   "file size(s))",
                                   &arguments->load, false);
  gt_option_is_development_option(load_option);
  gt_option_parser_add_option(op, load_option);

  /* -addintrons */
  addintrons_option = gt_option_new_bool("addintrons", "add intron features "
                                         "between existing exon features",
                                         &arguments->addintrons, false);
  gt_option_parser_add_option(op, addintrons_option);

  /* -offset */
  offset_option = gt_option_new_word("offset", "transform all features by the "
                                     "given offset", &arguments->offset,
                                     GT_UNDEF_WORD);
  gt_option_parser_add_option(op, offset_option);

  /* -offsetfile */
  offsetfile_option = gt_option_new_filename("offsetfile", "transform all "
                                             "features by the offsets given in "
                                             "file", arguments->offsetfile);
  gt_option_parser_add_option(op, offsetfile_option);
  gt_option_exclude(offset_option, offsetfile_option);

  /* -setsource */
  setsource_option = gt_option_new_string("setsource", "set the 'source' "
                                          "value (2nd column) of each feature",
                                          arguments->newsource, NULL);
  gt_option_parser_add_option(op, setsource_option);

  /* typecheck options */
  gt_typecheck_info_register_options(arguments->tci, op);

  /* -show */
  option = gt_option_new_bool("show", "show GFF3 output", &arguments->show,
                              true);
  gt_option_parser_add_option(op, option);

  /* -v */
  option = gt_option_new_verbose(&arguments->verbose);
  gt_option_parser_add_option(op, option);

  /* -width */
  option = gt_option_new_width(&arguments->width);
  gt_option_parser_add_option(op, option);

  /* output file options */
  gt_output_file_info_register_options(arguments->ofi, op, &arguments->outfp);

  /* set comment function */
  gt_option_parser_set_comment_func(op, gt_gtdata_show_help, NULL);

  return op;
}
static GtOptionParser*
gt_condenseq_compress_option_parser_new(void *tool_arguments)
{
  GtCondenseqCompressArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *option,
           *option_fraction;
  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[options] INPUTENCSEQ",
                            "Compresses a GtEncseq to a UniqueEncseq.");

  /* -indexname */
  option = gt_option_new_string("indexname",
                                "path and basename of files to store",
                                arguments->indexname, NULL);
  gt_option_parser_add_option(op, option);

  /* -kmersize */
  option = gt_option_new_uint_min("kmersize",
                                  "kmer-size used for the seeds, default "
                                  "depends on alphabet size",
                                  &arguments->kmersize, GT_UNDEF_UINT, 2U);
  gt_option_parser_add_option(op, option);

  /* -windowsize */
  option = gt_option_new_uint("windowsize",
                              "Size of window in which to search for hit pairs "
                              "of kmers, has to be larger than kmersize" ,
                              &arguments->windowsize, GT_UNDEF_UINT);
  gt_option_parser_add_option(op, option);

  /* -initsize */
  option = gt_option_new_uword("initsize",
                               "length of inital unique database in bases, "
                               "should be larger than -alignlength",
                               &arguments->initsize, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, option);

  /* -alignlength */
  option = gt_option_new_uword("alignlength",
                               "required minimal length of an xdrop-alignment, "
                               "should be larger than -windowsize",
                               &arguments->minalignlength, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, option);

  /* -cutoff */
  option = gt_option_new_uword("cutoff",
                               "if a kmer is found more often than this value "
                               "it will be ignored for alignment searches. "
                               "Setting this to 0 will disable cutoffs, "
                               "leaving it undefined will use a cutoff based "
                               "on the mean number of occurences of a k-word.",
                               &arguments->cutoff_value, GT_UNDEF_UWORD);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -fraction */
  option_fraction = gt_option_new_uword("fraction",
                               "when cutoffs aren'd disabled and no specific "
                               "value is set the mean number of occurrences "
                               "of each kmer divided by -fraction will be used "
                               "as cutoff",
                               &arguments->fraction, (GtUword) 2);
  gt_option_is_extended_option(option_fraction);
  gt_option_exclude(option, option_fraction);
  gt_option_parser_add_option(op, option_fraction);

  /* -disable_prune */
  option = gt_option_new_bool("disable_prune",
                              "when cutoffs and this option are set, "
                              "the database will still save every kmer, even "
                              "though only cutoff many kmers will be used.",
                              &arguments->prune, false);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -mat */
  option = gt_option_new_int("mat",
                             "matchscore for extension-alignment, "
                             "requirements: mat > mis, mat > 2ins, mat > 2del",
                             &arguments->scores.mat, 2);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -mis */
  option = gt_option_new_int("mis",
                             "mismatchscore for extension-alignment, ",
                             &arguments->scores.mis, -1);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -ins */
  option = gt_option_new_int("ins",
                             "insertionscore for extension-alignment",
                             &arguments->scores.ins, -2);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -del */
  option = gt_option_new_int("del",
                             "deletionscore for extension-alignment",
                             &arguments->scores.del, -2);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -xdrop */
  option = gt_option_new_word("xdrop",
                              "xdrop score for extension-alignment",
                              &arguments->xdrop, (GtWord) 3);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -brute_force */
  option = gt_option_new_bool("brute_force", "disable filtering of seeds. "
                              "Incompatible with -diagonals yes "
                              "or -full_diags yes",
                              &arguments->brute, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -diagonals */
  option = gt_option_new_bool("diagonals", "use sparse diagonals. "
                              "Incompatible with -brute_force yes. "
                              "Disabling both diagonals will result in simple "
                              "filtering of seed positions.",
                              &arguments->diags, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -full_diags */
  option = gt_option_new_bool("full_diags", "use full (time efficient "
                              "space inefficient) diagonals. "
                              "Incompatible with -brute_force yes. "
                              "Disabling both diagonals will result in simple "
                              "filtering of seed positions.",
                              &arguments->full_diags, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -clean_percent */
  option = gt_option_new_uint("diags_clean",
                              "Percentage of sparse diagonals that is allowed "
                              "to be marked as deletable. Sensible default is "
                              "set." ,
                              &arguments->clean_percent, GT_UNDEF_UINT);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -verbose */
  option = gt_option_new_bool("verbose", "enable verbose output",
                              &arguments->verbose, false);
  gt_option_parser_add_option(op, option);

  /* -kdb*/
  option = gt_option_new_bool("kdb", "prints out the kmer database (frequency "
                              "of each kmer), if -verbose each startposition "
                              "will be shown instead",
                              &arguments->kdb, false);
  gt_option_parser_add_option(op, option);

  return op;
}
Exemple #4
0
static GtOptionParser *gt_idxlocali_option_parser_new(void *tool_arguments)
{
    IdxlocaliOptions *arguments = tool_arguments;
    GtOptionParser *op;
    GtOption *option, *optionesaindex, *optionpckindex, *optiononline, *optioncmp;

    gt_assert(arguments != NULL);
    arguments->indexname = gt_str_new ();
    arguments->queryfiles = gt_str_array_new ();

    op = gt_option_parser_new
         ("[options] -q query-file-names [-esa|-pck] indexname",
          "Find all local alignments using suffix tree.");

    gt_option_parser_set_mail_address(op, "<*****@*****.**>");
    option = gt_option_new_filename_array("q","Specify files containing the "
                                          "query sequences",
                                          arguments->queryfiles);
    gt_option_parser_add_option (op, option);

    option = gt_option_new_word("match",
                                "Specify match score",
                                &arguments->matchscore, 1L);
    gt_option_parser_add_option(op, option);

    option = gt_option_new_word("mismatch",
                                "Specify mismatch score",
                                &arguments->mismatchscore, -3L);
    gt_option_parser_add_option(op, option);

    option = gt_option_new_word("gapstart",
                                "Specify gap start score",
                                &arguments->gapstart, -5L);
    gt_option_parser_add_option(op, option);

    option = gt_option_new_word("gapextend",
                                "Specify gap extension score",
                                &arguments->gapextend, -2L);
    gt_option_parser_add_option(op, option);

    option = gt_option_new_uword_min("th",
                                     "Specify the threshold",
                                     &arguments->threshold, 0, 1UL);
    gt_option_parser_add_option(op, option);
    gt_option_is_mandatory(option);

    optionesaindex = gt_option_new_string("esa",
                                          "Specify index "
                                          "(enhanced suffix array)",
                                          arguments->indexname, NULL);
    gt_option_parser_add_option(op, optionesaindex);
    arguments->refoptionesaindex = gt_option_ref(optionesaindex);

    optionpckindex = gt_option_new_string("pck",
                                          "Specify index (packed index)",
                                          arguments->indexname, NULL);
    gt_option_parser_add_option(op, optionpckindex);
    arguments->refoptionpckindex = gt_option_ref (optionpckindex);
    gt_option_exclude (optionesaindex, optionpckindex);
    gt_option_is_mandatory_either(optionesaindex, optionpckindex);

    optiononline = gt_option_new_bool("online","Perform online searches",
                                      &arguments->doonline, false);
    gt_option_parser_add_option(op, optiononline);
    gt_option_is_development_option(optiononline);

    optioncmp = gt_option_new_bool("cmp","Compare results of offline and online "
                                   "searches",
                                   &arguments->docompare, false);
    gt_option_parser_add_option(op,optioncmp);
    gt_option_exclude(optiononline,optioncmp);

    option = gt_option_new_bool("s",
                                "Show alignments",
                                &arguments->showalignment, false);
    gt_option_parser_add_option(op, option);

    option = gt_option_new_verbose(&arguments->verbose);
    gt_option_parser_add_option(op, option);
    return op;
}