Esempio n. 1
0
static GtOptionParser* gt_compressedbits_option_parser_new(void *tool_arguments)
{
  GtCompressdbitsArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *option;
  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[option ...]",
                       "Testing compressed bitsequence, save to disk, reload.");

  /* -size */
  option = gt_option_new_ulong("size",
                               "size of GtBitsequence to create "
                               "(words 32/64 bit)",
                               &arguments->size, 20UL);
  gt_option_parser_add_option(op, option);
  arguments->size_op = gt_option_ref(option);

  /* -samplerate */
  option = gt_option_new_uint("samplerate",
                              "samplerate of random GtBitsequence to test",
                              &arguments->samplerate, 32U);
  gt_option_parser_add_option(op, option);

  /* -rand */
  option = gt_option_new_bool("rand", "create random bitvector",
                              &arguments->fill_random, false);
  gt_option_parser_add_option(op, option);
  arguments->rand_op = gt_option_ref(option);

  /* -check */
  option = gt_option_new_bool("check", "compare original with compressed and "
                              "loaded from file",
                              &arguments->check_consistency, false);
  gt_option_parser_add_option(op, option);
  arguments->rand_op = gt_option_ref(option);

  /* -input */
  option = gt_option_new_filename(
                                "input",
                                "load vector from file, format is as follows:\n"
                                "[ULL size in bits][[ULL bits]...]\n"
                                " not usable with -size and -rand",
                                arguments->filename);
  gt_option_parser_add_option(op, option);
  arguments->filename_op = gt_option_ref(option);
  gt_option_exclude(arguments->filename_op, arguments->size_op);
  gt_option_exclude(arguments->filename_op, arguments->rand_op);
  /* -benches */
  option = gt_option_new_ulong("benches",
                               "number of function calls to benchmark",
                               &arguments->benches, 100000UL);
  gt_option_parser_add_option(op, option);

  return op;
}
static GtOptionParser*
gt_condenseq_hmmsearch_option_parser_new(void *tool_arguments)
{
  GtCondenseqHmmsearchArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *option;
  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[option ...] -db DATABASE -hmm HMMPROFILE",
                            "Perform a hmmsearch on the given compressed "
                            "database.");

  /* -db and -verbose */
  gt_condenseq_search_register_options(arguments->csa, op);

  /* -hmm */
  option = gt_option_new_string("hmm", "hmm query", arguments->hmm, NULL);
  gt_option_is_mandatory(option);
  gt_option_parser_add_option(op, option);

  /* -hmmsearch */
  option = gt_option_new_string("hmmsearch", "path to hmmsearch, please set if "
                                "not installed at (linux) default location",
                                arguments->hmmsearch_path,
                                "/usr/bin/hmmsearch");
  gt_option_parser_add_option(op, option);

  /* -tblout */
  option = gt_option_new_string("tblout", "file basename to output tabular "
                                "hmmsearch output to (like hmmer option "
                                "--tblout). Depending on -max_queries will "
                                "produce multiple numbered files.",
                                arguments->outtable_filename, NULL);
  gt_option_parser_add_option(op, option);

  /* -force_ow */
  option = gt_option_new_bool("force_ow", "force overwrite of existing files",
                              &arguments->force_ow, false);
  gt_option_parser_add_option(op, option);

  /* -max_queries */
  option = gt_option_new_uint("max_queries", "maximum number of queries per "
                              "fine search, influences file-size and therefore "
                              "speed!, 0 disables splitting",
                              &arguments->max_queries, 5U);
  gt_option_parser_add_option(op, option);
  return op;
}
static GtOptionParser* gt_readjoiner_assembly_option_parser_new(
    void *tool_arguments)
{
  GtReadjoinerAssemblyArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *option, *errors_option, *deadend_option, *v_option,
           *q_option, *bubble_option, *deadend_depth_option;
  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[option ...]",
      "Construct string graph and output contigs.");

  /* -readset */
  option = gt_option_new_string("readset", "specify the readset name",
      arguments->readset, NULL);
  gt_option_parser_add_option(op, option);
  gt_option_is_mandatory(option);

  /* -spmfiles */
  option = gt_option_new_uint_min("spmfiles", "number of SPM files to read\n"
      "this must be equal to the value of -j for the overlap phase",
      &arguments->nspmfiles, 1U, 1U);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -l */
  option = gt_option_new_uint_min("l", "specify the minimum SPM length",
      &arguments->minmatchlength, 0, 2U);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -depthcutoff */
  option = gt_option_new_uint_min("depthcutoff", "specify the minimal "
      "number of nodes in a contig",
      &arguments->depthcutoff, 3U, 1U);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -lengthcutoff */
  option = gt_option_new_uint_min("lengthcutoff", "specify the minimal "
      "length of a contig",
      &arguments->lengthcutoff, 100U, 1U);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -redtrans */
  option = gt_option_new_bool("redtrans", "reduce transitive edges",
      &arguments->redtrans, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -errors */
  errors_option = gt_option_new_bool("errors", "search graph features which "
      "may originate from sequencing errors and remove them",
      &arguments->errors, false);
  gt_option_is_extended_option(errors_option);
  gt_option_parser_add_option(op, errors_option);

  /* -bubble */
  bubble_option = gt_option_new_uint("bubble", "number of rounds of p-bubble "
      "removal to perform", &arguments->bubble, 3U);
  gt_option_is_extended_option(bubble_option);
  gt_option_imply(bubble_option, errors_option);
  gt_option_parser_add_option(op, bubble_option);

  /* -deadend */
  deadend_option = gt_option_new_uint("deadend", "number of rounds of "
      "dead end removal to perform a dead end",
      &arguments->deadend, 10U);
  gt_option_is_extended_option(deadend_option);
  gt_option_imply(deadend_option, errors_option);
  gt_option_parser_add_option(op, deadend_option);

  /* -deadend-depth */
  deadend_depth_option = gt_option_new_uint_min("deadend-depth", "specify the "
      "maximal depth of a path to an end-vertex by which the path shall be "
      "considered a dead end",
      &arguments->deadend_depth, 10U, 1U);
  gt_option_is_extended_option(deadend_depth_option);
  gt_option_imply(deadend_depth_option, errors_option);
  gt_option_parser_add_option(op, deadend_depth_option);

  /* -paths2seq */
  option = gt_option_new_bool("paths2seq", "read <indexname>"
      GT_READJOINER_SUFFIX_CONTIG_PATHS " and write "
      "<indexname>" GT_READJOINER_SUFFIX_CONTIGS,
      &arguments->paths2seq, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -buffersize */
  option = gt_option_new_string("buffersize", "specify size for read buffer"
      " of paths2seq phase (in bytes, the keywords 'MB' and 'GB' are allowed)",
                       arguments->buffersizearg, NULL);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);
  arguments->refoptionbuffersize = gt_option_ref(option);

  /* -vd */
  option = gt_option_new_bool("vd", "use verbose descriptions for contigs",
      &arguments->vd, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -astat */
  option = gt_option_new_bool("astat", "calculate A-statistics for each contig",
      &arguments->astat, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -cov */
  option = gt_option_new_double("cov", "average coverage value to use for the "
      "A-statistics calculation", &arguments->coverage, (double)0);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -copynum */
  option = gt_option_new_bool("copynum", "load reads copy numbers list from "
      "file for the A-statistics calculation",
      &arguments->copynum, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -v */
  v_option = gt_option_new_verbose(&arguments->verbose);
  gt_option_parser_add_option(op, v_option);

  /* -q */
  q_option = gt_option_new_bool("q", "suppress standard output messages",
      &arguments->quiet, false);
  gt_option_parser_add_option(op, q_option);
  gt_option_exclude(q_option, v_option);

  /* -load */
  option = gt_option_new_bool("load", "save the string graph from file",
      &arguments->load, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -save */
  option = gt_option_new_bool("save", "save the string graph to file",
      &arguments->save, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -show_contigs_info */
  option = gt_option_new_bool("cinfo", "output additional files required "
      "for contigs graph construction (eqlen only)",
      &arguments->show_contigs_info, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  gt_option_parser_set_version_func(op, gt_readjoiner_show_version);
  gt_option_parser_set_max_args(op, 0);

  return op;
}
static GtOptionParser*
gt_condenseq_compress_option_parser_new(void *tool_arguments)
{
  GtCondenseqCompressArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *option,
           *option_fraction;
  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[options] INPUTENCSEQ",
                            "Compresses a GtEncseq to a UniqueEncseq.");

  /* -indexname */
  option = gt_option_new_string("indexname",
                                "path and basename of files to store",
                                arguments->indexname, NULL);
  gt_option_parser_add_option(op, option);

  /* -kmersize */
  option = gt_option_new_uint_min("kmersize",
                                  "kmer-size used for the seeds, default "
                                  "depends on alphabet size",
                                  &arguments->kmersize, GT_UNDEF_UINT, 2U);
  gt_option_parser_add_option(op, option);

  /* -windowsize */
  option = gt_option_new_uint("windowsize",
                              "Size of window in which to search for hit pairs "
                              "of kmers, has to be larger than kmersize" ,
                              &arguments->windowsize, GT_UNDEF_UINT);
  gt_option_parser_add_option(op, option);

  /* -initsize */
  option = gt_option_new_uword("initsize",
                               "length of inital unique database in bases, "
                               "should be larger than -alignlength",
                               &arguments->initsize, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, option);

  /* -alignlength */
  option = gt_option_new_uword("alignlength",
                               "required minimal length of an xdrop-alignment, "
                               "should be larger than -windowsize",
                               &arguments->minalignlength, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, option);

  /* -cutoff */
  option = gt_option_new_uword("cutoff",
                               "if a kmer is found more often than this value "
                               "it will be ignored for alignment searches. "
                               "Setting this to 0 will disable cutoffs, "
                               "leaving it undefined will use a cutoff based "
                               "on the mean number of occurences of a k-word.",
                               &arguments->cutoff_value, GT_UNDEF_UWORD);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -fraction */
  option_fraction = gt_option_new_uword("fraction",
                               "when cutoffs aren'd disabled and no specific "
                               "value is set the mean number of occurrences "
                               "of each kmer divided by -fraction will be used "
                               "as cutoff",
                               &arguments->fraction, (GtUword) 2);
  gt_option_is_extended_option(option_fraction);
  gt_option_exclude(option, option_fraction);
  gt_option_parser_add_option(op, option_fraction);

  /* -disable_prune */
  option = gt_option_new_bool("disable_prune",
                              "when cutoffs and this option are set, "
                              "the database will still save every kmer, even "
                              "though only cutoff many kmers will be used.",
                              &arguments->prune, false);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -mat */
  option = gt_option_new_int("mat",
                             "matchscore for extension-alignment, "
                             "requirements: mat > mis, mat > 2ins, mat > 2del",
                             &arguments->scores.mat, 2);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -mis */
  option = gt_option_new_int("mis",
                             "mismatchscore for extension-alignment, ",
                             &arguments->scores.mis, -1);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -ins */
  option = gt_option_new_int("ins",
                             "insertionscore for extension-alignment",
                             &arguments->scores.ins, -2);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -del */
  option = gt_option_new_int("del",
                             "deletionscore for extension-alignment",
                             &arguments->scores.del, -2);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -xdrop */
  option = gt_option_new_word("xdrop",
                              "xdrop score for extension-alignment",
                              &arguments->xdrop, (GtWord) 3);
  gt_option_is_extended_option(option);
  gt_option_parser_add_option(op, option);

  /* -brute_force */
  option = gt_option_new_bool("brute_force", "disable filtering of seeds. "
                              "Incompatible with -diagonals yes "
                              "or -full_diags yes",
                              &arguments->brute, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -diagonals */
  option = gt_option_new_bool("diagonals", "use sparse diagonals. "
                              "Incompatible with -brute_force yes. "
                              "Disabling both diagonals will result in simple "
                              "filtering of seed positions.",
                              &arguments->diags, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -full_diags */
  option = gt_option_new_bool("full_diags", "use full (time efficient "
                              "space inefficient) diagonals. "
                              "Incompatible with -brute_force yes. "
                              "Disabling both diagonals will result in simple "
                              "filtering of seed positions.",
                              &arguments->full_diags, false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -clean_percent */
  option = gt_option_new_uint("diags_clean",
                              "Percentage of sparse diagonals that is allowed "
                              "to be marked as deletable. Sensible default is "
                              "set." ,
                              &arguments->clean_percent, GT_UNDEF_UINT);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -verbose */
  option = gt_option_new_bool("verbose", "enable verbose output",
                              &arguments->verbose, false);
  gt_option_parser_add_option(op, option);

  /* -kdb*/
  option = gt_option_new_bool("kdb", "prints out the kmer database (frequency "
                              "of each kmer), if -verbose each startposition "
                              "will be shown instead",
                              &arguments->kdb, false);
  gt_option_parser_add_option(op, option);

  return op;
}
Esempio n. 5
0
static GtOptionParser* gt_encseq2spm_option_parser_new(void *tool_arguments)
{
    GtEncseq2spmArguments *arguments = tool_arguments;
    GtOptionParser *op;
    GtOption *option, *optionparts, *optionmemlimit;

    gt_assert(arguments);

    /* init */
    op = gt_option_parser_new("[option ...] [file]",
                              "Compute suffix prefix matches "
                              "from encoded sequence.");

    /* -l */
    option = gt_option_new_uint_min("l", "specify the minimum length",
                                    &arguments->minmatchlength, 0, 1U);
    gt_option_parser_add_option(op, option);
    gt_option_is_mandatory(option);

    /* -parts */
    optionparts = gt_option_new_uint("parts", "specify the number of parts",
                                     &arguments->numofparts, 0U);
    gt_option_parser_add_option(op, optionparts);

    /* -memlimit */
    optionmemlimit = gt_option_new_string("memlimit",
                                          "specify maximal amount of memory to be used during "
                                          "index construction (in bytes, the keywords 'MB' "
                                          "and 'GB' are allowed)",
                                          arguments->memlimitarg, NULL);
    gt_option_parser_add_option(op, optionmemlimit);
    gt_option_exclude(optionmemlimit, optionparts);
    arguments->refoptionmemlimit = gt_option_ref(optionmemlimit);

    /* -checksuftab */
    option = gt_option_new_bool("checksuftab", "check the suffix table",
                                &arguments->checksuftab, false);
    gt_option_parser_add_option(op, option);
    gt_option_is_development_option(option);

    /* -singlestrand */
    option = gt_option_new_bool("singlestrand", "use only the forward strand "
                                "of the sequence",
                                &arguments->singlestrand, false);
    gt_option_parser_add_option(op, option);

    /* -spm */
    option = gt_option_new_string("spm", "specify output for spms",
                                  arguments->spmspec, NULL);
    gt_option_parser_add_option(op, option);

    /* -ii */
    option = gt_option_new_string("ii", "specify the input sequence",
                                  arguments->encseqinput, NULL);
    gt_option_parser_add_option(op, option);
    gt_option_is_mandatory(option);

    /* -onlyaccum */
    option = gt_option_new_bool("onlyaccum", "only accumulate codes",
                                &arguments->onlyaccum, false);
    gt_option_parser_add_option(op, option);
    gt_option_is_development_option(option);

    /* -onlyallfirstcodes */
    option = gt_option_new_bool("onlyallfirstcodes", "only determines allcodes",
                                &arguments->onlyallfirstcodes, false);
    gt_option_parser_add_option(op, option);
    gt_option_is_development_option(option);

    /* -addbscachedepth */
    option = gt_option_new_uint("addbscachedepth", "only determines allcodes",
                                &arguments->addbscache_depth, 5U);
    gt_option_parser_add_option(op, option);
    gt_option_is_development_option(option);

    /* -phase2extra */
    option = gt_option_new_string("phase2extra",
                                  "specify  amount of additional space required for "
                                  "the second phase of the computation involving the "
                                  "processing of the intervals (in bytes, "
                                  "the keywords 'MB' and 'GB' are allowed)",
                                  arguments->phase2extraarg, NULL);
    gt_option_parser_add_option(op, option);
    arguments->refoptionphase2extra = gt_option_ref(option);
    gt_option_is_development_option(option);

    /* -radixlarge */
    option = gt_option_new_bool("radixlarge", "use large tables for radixsort",
                                &arguments->radixlarge, false);
    gt_option_parser_add_option(op, option);
    gt_option_is_development_option(option);

    /* -radixparts */
    option = gt_option_new_uint("radixparts", "specify the number of parts "
                                "for radixsort",
                                &arguments->radixparts, 1U);
    gt_option_parser_add_option(op, option);
    gt_option_is_development_option(option);

    /* -singlescan */
    option = gt_option_new_uint("singlescan", "run a single scan: 1=fast; "
                                "2=fast with check; 3=fast with output; "
                                "4=sfx-mapped4-version",
                                &arguments->singlescan, 0);
    gt_option_parser_add_option(op, option);
    gt_option_is_development_option(option);

    /* -forcek */
    option = gt_option_new_uint("forcek", "specify the value of k",
                                &arguments->forcek, 0);
    gt_option_is_development_option(option);
    gt_option_parser_add_option(op, option);

    option = gt_option_new_verbose(&arguments->verbose);
    gt_option_parser_add_option(op, option);

    return op;
}
Esempio n. 6
0
static GtOptionParser* gt_tir_option_parser_new(void *tool_arguments)
{
  GtTirArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *optionindex,      /* index */
           *optionseed,       /* minseedlength */
           *optionminlentir,  /* minimal length of TIR */
           *optionmaxlentir,  /* maximal length of TIR */
           *optionmindisttir, /* minimal distance of TIRs */
           *optionmaxdisttir, /* maximal distance of TIRs */
           *optionmat,        /* arbitrary scores */
           *optionmis,
           *optionins,
           *optiondel,
           *optionxdrop,      /* xdropbelowscore for extension alignment */
           *optionsimilar,    /* similarity threshold */
           *optionoverlaps,   /* for overlaps */
           *optionmintsd,     /* minimal length for Target Site Duplication */
           *optionmaxtsd,     /* maximal length for Target Site Duplication */
           *optionvicinity,   /* vicinity around TIRs to be searched for TSDs */
           *optionhmms,
           *optionevalcutoff,
           *optionpdomcutoff,
           *optionmaxgap;
  static const char *overlaps[] = {
    "best", /* default */
    "no",
    "all",
    NULL
  };
  static const char *cutoffs[] = {
    "NONE",
    "GA",
    "TC",
    NULL
  };
  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[option ...] -index INDEXNAME",
                            "Identify Terminal Inverted Repeat (TIR) elements,"
                            "such as DNA transposons.");

  /* -index */
  optionindex = gt_option_new_string("index",
                                     "specify the name of the enhanced suffix "
                                     "array index (mandatory)",
                                     arguments->str_indexname, NULL);
  gt_option_is_mandatory(optionindex);
  gt_option_parser_add_option(op, optionindex);

   /* -seed */
  optionseed = gt_option_new_uword_min("seed",
                                       "specify minimum seed length for "
                                       "exact repeats",
                                       &arguments->min_seed_length, 20UL, 2UL);
  gt_option_parser_add_option(op, optionseed);

  /* -minlentir */
  optionminlentir = gt_option_new_uword_min_max("mintirlen",
                                                "specify minimum length for "
                                                "each TIR",
                                                &arguments->min_TIR_length,
                                                27UL, 1UL, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, optionminlentir);

  /* -maxlentir */
  optionmaxlentir = gt_option_new_uword_min_max("maxtirlen",
                                                "specify maximum length for "
                                                "each TIR",
                                                &arguments->max_TIR_length,
                                                1000UL, 1UL, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, optionmaxlentir);

  /* -mindisttir */
  optionmindisttir = gt_option_new_uword_min_max("mintirdist",
                                                 "specify minimum distance of "
                                                 "TIRs",
                                                 &arguments->min_TIR_distance,
                                                 100UL, 1UL, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, optionmindisttir);

  /* -maxdisttir */
  optionmaxdisttir = gt_option_new_uword_min_max("maxtirdist",
                                                 "specify maximum distance of "
                                                 "TIRs",
                                                 &arguments->max_TIR_distance,
                                                 10000UL, 1UL, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, optionmaxdisttir);

  optionmat = gt_option_new_int_min("mat",
                                    "specify matchscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.mat, 2, 1);
  gt_option_parser_add_option(op, optionmat);

  /* -mis */
  optionmis = gt_option_new_int_max("mis",
                                    "specify mismatchscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.mis, -2, -1);
  gt_option_parser_add_option(op, optionmis);

  /* -ins */
  optionins = gt_option_new_int_max("ins",
                                    "specify insertionscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.ins, -3, -1);
  gt_option_parser_add_option(op, optionins);

  /* -del */
  optiondel = gt_option_new_int_max("del",
                                    "specify deletionscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.del, -3, -1);
  gt_option_parser_add_option(op, optiondel);

  /* -xdrop */
  optionxdrop = gt_option_new_int_min("xdrop",
                                      "specify xdropbelowscore for "
                                      "extension-alignment",
                                      &arguments->xdrop_belowscore, (int) 5,
                                      (int) 0);
  gt_option_parser_add_option(op, optionxdrop);

  /* -similar */
  optionsimilar = gt_option_new_double_min_max("similar",
                                               "specify similaritythreshold in "
                                               "range [1..100%]",
                                               &arguments->similarity_threshold,
                                               (double) 85.0, (double) 0.0,
                                               100.0);
  gt_option_parser_add_option(op, optionsimilar);

  /* -overlaps */
  optionoverlaps = gt_option_new_choice("overlaps", "specify no|best|all",
                                        arguments->str_overlaps,
                                        overlaps[0], overlaps);
  gt_option_parser_add_option(op, optionoverlaps);
  arguments->optionoverlaps = gt_option_ref(optionoverlaps);

  /* -mintsd */
  optionmintsd = gt_option_new_uword_min_max("mintsd",
                                             "specify minimum length for each "
                                             "TSD",
                                             &arguments->min_TSD_length,
                                             2U, 0, GT_UNDEF_UINT);
  gt_option_parser_add_option(op, optionmintsd);

  /* -maxtsd */
  optionmaxtsd = gt_option_new_uword_min_max("maxtsd",
                                             "specify maximum length for each "
                                             "TSD",
                                             &arguments->max_TSD_length,
                                             11U, 0, GT_UNDEF_UINT);
  gt_option_parser_add_option(op, optionmaxtsd);
  gt_option_imply(optionmaxtsd, optionmintsd);

  /* -vicinity */
  optionvicinity = gt_option_new_uword_min_max("vic",
                                               "specify the number of "
                                               "nucleotides (to the left and "
                                               "to the right) that will be "
                                               "searched for TSDs around 5' "
                                               "and 3' boundary of predicted "
                                               "TIRs",
                                               &arguments->vicinity,
                                               60U, 1U, 500U);
  gt_option_parser_add_option(op, optionvicinity);

  optionhmms = gt_option_new_filename_array("hmms",
                                    "profile HMM models for domain detection "
                                    "(separate by spaces, finish with --) in "
                                    "HMMER3 format\n"
                                    "Omit this option to disable pHMM search.",
                                    arguments->hmm_files);
  gt_option_parser_add_option(op, optionhmms);

  optionevalcutoff = gt_option_new_probability("pdomevalcutoff",
                                    "global E-value cutoff for pHMM search\n"
                                    "default 1E-6",
                                    &arguments->evalue_cutoff,
                                    0.000001);
  gt_option_parser_add_option(op, optionevalcutoff);
  gt_option_hide_default(optionevalcutoff);
  gt_option_imply(optionevalcutoff, optionhmms);

  optionpdomcutoff = gt_option_new_choice("pdomcutoff",
                                       "model-specific score cutoff\n"
                                       "choose from TC (trusted cutoff) | "
                                       "GA (gathering cutoff) | "
                                       "NONE (no cutoffs)",
                             arguments->cutoffs, cutoffs[1], cutoffs);
  gt_option_parser_add_option(op, optionpdomcutoff);
  gt_option_imply(optionpdomcutoff, optionhmms);

  /* XXX: make -pdomcutoff and -pdomevalcutoff mutually exclusive */

  optionmaxgap = gt_option_new_uint("maxgaplen",
                         "maximal allowed gap size between fragments (in amino "
                         "acids) when chaining pHMM hits for a protein domain",
                         &arguments->chain_max_gap_length,
                         50U);
  gt_option_parser_add_option(op, optionmaxgap);
  gt_option_is_extended_option(optionmaxgap);
  gt_option_imply(optionmaxgap, optionhmms);

  return op;
}
Esempio n. 7
0
static GtOptionParser* gt_ltrdigest_option_parser_new(void *tool_arguments)
{
  GtLTRdigestOptions *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *o, *ot, *oto;
  GtOption *oh, *oc, *oeval;
  static const char *cutoffs[] = {"NONE", "GA", "TC", NULL};
  static GtRange pptlen_defaults           = { 8UL, 30UL},
                 uboxlen_defaults          = { 3UL, 30UL},
                 pbsalilen_defaults        = {11UL, 30UL},
                 pbsoffsetlen_defaults     = { 0UL,  5UL},
                 pbstrnaoffsetlen_defaults = { 0UL,  5UL};
  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[option ...] gff3_file",
                            "Identifies and annotates sequence features in LTR "
                            "retrotransposon candidates.");

  /* Output files */

  oto = gt_option_new_string("outfileprefix",
                             "prefix for output files (e.g. 'foo' will create "
                             "files called 'foo_*.csv' and 'foo_*.fas')\n"
                             "Omit this option for GFF3 output only.",
                             arguments->prefix,
                             NULL);
  gt_option_parser_add_option(op, oto);
  gt_option_hide_default(oto);

  o = gt_option_new_bool("metadata",
                         "output metadata (run conditions) to separate file",
                         &arguments->print_metadata,
                         true);
  gt_option_parser_add_option(op, o);
  gt_option_imply(o, oto);

  o = gt_option_new_uint("seqnamelen",
                         "set maximal length of sequence names in FASTA headers"
                         " (e.g. for clustalw or similar tools)",
                         &arguments->seqnamelen,
                         20U);
  gt_option_parser_add_option(op, o);

  /* PPT search options */

  o = gt_option_new_range("pptlen",
                          "required PPT length range",
                          &arguments->ppt_len,
                          &pptlen_defaults);
  gt_option_parser_add_option(op, o);

  o = gt_option_new_range("uboxlen",
                          "required U-box length range",
                          &arguments->ubox_len,
                          &uboxlen_defaults);
  gt_option_parser_add_option(op, o);

  o = gt_option_new_uint("uboxdist",
                         "allowed U-box distance range from PPT",
                         &arguments->max_ubox_dist, 0);
  gt_option_parser_add_option(op, o);

  o = gt_option_new_uint("pptradius",
                         "radius around beginning of 3' LTR "
                         "to search for PPT",
                         &arguments->ppt_radius,
                         30U);
  gt_option_parser_add_option(op, o);

  o = gt_option_new_probability("pptrprob",
                                "purine emission probability inside PPT",
                                &arguments->ppt_purine_prob,
                                PPT_PURINE_PROB);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);

  o = gt_option_new_probability("pptyprob",
                                "pyrimidine emission probability inside PPT",
                                &arguments->ppt_pyrimidine_prob,
                                PPT_PYRIMIDINE_PROB);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);

  o = gt_option_new_probability("pptgprob",
                                "background G emission probability outside PPT",
                                &arguments->bkg_g_prob,
                                BKG_G_PROB);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);

  o = gt_option_new_probability("pptcprob",
                                "background C emission probability outside PPT",
                                &arguments->bkg_c_prob,
                                BKG_C_PROB);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);

  o = gt_option_new_probability("pptaprob",
                                "background A emission probability outside PPT",
                                &arguments->bkg_a_prob,
                                BKG_A_PROB);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);

  o = gt_option_new_probability("ppttprob",
                                "background T emission probability outside PPT",
                                &arguments->bkg_t_prob,
                                BKG_T_PROB);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);

  o = gt_option_new_probability("pptuprob",
                                "U/T emission probability inside U-box",
                                &arguments->ubox_u_prob,
                                UBOX_U_PROB);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);

  /* PBS search options */

  ot = gt_option_new_filename("trnas",
                              "tRNA library in multiple FASTA format for PBS "
                              "detection\n"
                              "Omit this option to disable PBS search.",
                              arguments->trna_lib);
  gt_option_parser_add_option(op, ot);
  gt_option_hide_default(ot);

  o = gt_option_new_range("pbsalilen",
                          "required PBS/tRNA alignment length range",
                          &arguments->alilen,
                          &pbsalilen_defaults);
  gt_option_parser_add_option(op, o);
  gt_option_imply(o, ot);

  o = gt_option_new_range("pbsoffset",
                          "allowed PBS offset from LTR boundary range",
                          &arguments->offsetlen,
                          &pbsoffsetlen_defaults);
  gt_option_parser_add_option(op, o);
  gt_option_imply(o, ot);

  o = gt_option_new_range("pbstrnaoffset",
                          "allowed PBS/tRNA 3' end alignment offset range",
                          &arguments->trnaoffsetlen,
                          &pbstrnaoffsetlen_defaults);
  gt_option_parser_add_option(op, o);
  gt_option_imply(o, ot);

  o = gt_option_new_uint("pbsmaxedist",
                         "maximal allowed PBS/tRNA alignment unit "
                         "edit distance",
                         &arguments->max_edist,
                         1U);
  gt_option_parser_add_option(op, o);
  gt_option_imply(o, ot);

  o = gt_option_new_uint("pbsradius",
                         "radius around end of 5' LTR "
                         "to search for PBS",
                         &arguments->pbs_radius,
                         30U);
  gt_option_parser_add_option(op, o);
  gt_option_imply(o, ot);

 /* Protein domain search options */

  oh = gt_option_new_filename_array("hmms",
                                    "profile HMM models for domain detection "
                                    "(separate by spaces, finish with --) in "
                                    "HMMER3 format\n"
                                    "Omit this option to disable pHMM search.",
                                    arguments->hmm_files);
  gt_option_parser_add_option(op, oh);

  oeval = gt_option_new_probability("pdomevalcutoff",
                                    "global E-value cutoff for pHMM search\n"
                                    "default 1E-6",
                                    &arguments->evalue_cutoff,
                                    0.000001);
  gt_option_parser_add_option(op, oeval);
  gt_option_is_extended_option(oeval);
  gt_option_hide_default(oeval);
  gt_option_imply(oeval, oh);

  oc = gt_option_new_choice("pdomcutoff", "model-specific score cutoff\n"
                                       "choose from TC (trusted cutoff) | "
                                       "GA (gathering cutoff) | "
                                       "NONE (no cutoffs)",
                             arguments->cutoffs, cutoffs[0], cutoffs);
  gt_option_parser_add_option(op, oc);
  gt_option_is_extended_option(oeval);
  gt_option_imply(oeval, oh);

  o = gt_option_new_bool("aliout",
                         "output pHMM to amino acid sequence alignments",
                         &arguments->write_alignments,
                         false);
  gt_option_parser_add_option(op, o);
  gt_option_imply(o, oh);
  gt_option_imply(o, oto);

  o = gt_option_new_bool("aaout",
                         "output amino acid sequences for protein domain "
                         "hits",
                         &arguments->write_aaseqs,
                         false);
  gt_option_parser_add_option(op, o);
  gt_option_imply(o, oh);
  gt_option_imply(o, oto);

  o = gt_option_new_bool("allchains",
                           "output features from all chains and unchained "
                           "features, labeled with chain numbers",
                           &arguments->output_all_chains,
                           false);
  gt_option_parser_add_option(op, o);
  gt_option_imply(o, oh);

  o = gt_option_new_uint("maxgaplen",
                         "maximal allowed gap size between fragments (in amino "
                         "acids) when chaining pHMM hits for a protein domain",
                         &arguments->chain_max_gap_length,
                         50U);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);
  gt_option_imply(o, oh);

  o = gt_option_new_uword("threads",
                          "DEPRECATED, only included for compatibility reasons!"
                          " Use the -j parameter of the 'gt' call instead.",
                          &arguments->nthreads,
                          0);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);

  /* Extended PBS options */

  o = gt_option_new_int("pbsmatchscore",
                        "match score for PBS/tRNA alignments",
                        &arguments->ali_score_match,
                        5);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);
  gt_option_imply(o, ot);

  o = gt_option_new_int("pbsmismatchscore",
                        "mismatch score for PBS/tRNA alignments",
                        &arguments->ali_score_mismatch,
                        -10);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);
  gt_option_imply(o, ot);

  o = gt_option_new_int("pbsinsertionscore",
                        "insertion score for PBS/tRNA alignments",
                        &arguments->ali_score_insertion,
                        -20);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);
  gt_option_imply(o, ot);

  o = gt_option_new_int("pbsdeletionscore",
                        "deletion score for PBS/tRNA alignments",
                        &arguments->ali_score_deletion,
                        -20);
  gt_option_parser_add_option(op, o);
  gt_option_is_extended_option(o);
  gt_option_imply(o, ot);

  /* verbosity */

  o = gt_option_new_verbose(&arguments->verbose);
  gt_option_parser_add_option(op, o);

  /* output file options */

  gt_output_file_info_register_options(arguments->ofi, op, &arguments->outfp);

  /* region mapping and sequence source options */

  gt_seqid2file_register_options_ext(op, arguments->s2fi, false, false);

  return op;
}
Esempio n. 8
0
static GtOptionParser* gtr_option_parser_new(GtR *gtr)
{
  GtOptionParser *op;
  GtOption *o, *only_option, *debug_option, *debugfp_option;
  gt_assert(gtr);
  op = gt_option_parser_new("[option ...] [tool | script] [argument ...]",
                            "The GenomeTools genome analysis system.");
  gt_option_parser_set_comment_func(op, show_gtr_help, gtr->tools);
  o = gt_option_new_bool("i",
                         "enter interactive mode after executing 'tool' or "
                         "'script'", &gtr->interactive, false);
  gt_option_hide_default(o);
  gt_option_parser_add_option(op, o);
  o = gt_option_new_bool("q", "suppress warnings", &gtr->quiet, false);
  gt_option_hide_default(o);
  gt_option_parser_add_option(op, o);
  o = gt_option_new_uint_min("j", "set number of parallel threads used at once",
                             &gt_jobs, 1, 1);
  gt_option_is_development_option(o);
  gt_option_parser_add_option(op, o);
  o = gt_option_new_bool("test", "perform unit tests and exit", &gtr->test,
                         false);
  gt_option_hide_default(o);
  gt_option_parser_add_option(op, o);
  only_option = gt_option_new_string("only", "perform single unit test "
                                     "(requires -test)", gtr->test_only, "");
  gt_option_imply(only_option, o);
  gt_option_is_development_option(only_option);
  gt_option_hide_default(only_option);
  gt_option_parser_add_option(op, only_option);
  debug_option = gt_option_new_debug(&gtr->debug);
  gt_option_parser_add_option(op, debug_option);
  debugfp_option = gt_option_new_string("debugfp",
                                     "set file pointer for debugging output\n"
                                     "use ``stdout'' for standard output\n"
                                     "use ``stderr'' for standard error\n"
                                     "or any other string to use the "
                                     "corresponding file (will be overwritten "
                                     "without warning!)", gtr->debugfp,
                                     "stderr");
  gt_option_is_development_option(debugfp_option);
  gt_option_parser_add_option(op, debugfp_option);
  gt_option_imply(debugfp_option, debug_option);
  o = gt_option_new_uint("seed",
                         "set seed for random number generator manually.\n"
                         "0 generates a seed from current time and process id",
                         &gtr->seed, gtr->seed);
  gt_option_hide_default(o);
  gt_option_parser_add_option(op, o);
  o = gt_option_new_bool("64bit", "exit with code 0 if this is a 64bit binary, "
                         "with 1 otherwise", &gtr->check64bit, false);
  gt_option_is_development_option(o);
  gt_option_parser_add_option(op, o);
  o = gt_option_new_bool("list", "list all tools and exit", &gtr->list, false);
  gt_option_is_development_option(o);
  gt_option_hide_default(o);
  gt_option_parser_add_option(op, o);
  o = gt_option_new_filename("testspacepeak", "alloc 64 MB and mmap the given "
                             "file", gtr->testspacepeak);
  gt_option_is_development_option(o);
  gt_option_parser_add_option(op, o);

  o = gt_option_new_string("createman", "create man page sources in directory",
                           gtr->manoutdir, "");
  gt_option_is_development_option(o);
  gt_option_parser_add_option(op, o);
  return op;
}