示例#1
0
static GtOptionParser* gt_seed_extend_option_parser_new(void *tool_arguments)
{
  GtSeedExtendArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *option, *op_gre, *op_xdr, *op_cam, *op_his, *op_dif, *op_pmh,
    *op_len, *op_err, *op_xbe, *op_sup, *op_frq, *op_mem, *op_ali, *op_bia;
  gt_assert(arguments != NULL);

  /* init */
  op = gt_option_parser_new("[option ...] encseq_basename [encseq_basename]",
                            "Calculate local alignments using the seed and "
                            "extend algorithm.");

  /* DIAGBANDSEED OPTIONS */

  /* -ii */
  option = gt_option_new_string("ii",
                                "Input index for encseq encoded sequences",
                                arguments->dbs_indexname,
                                "");
  gt_option_is_mandatory(option);
  gt_option_parser_add_option(op, option);

  /* -qii */
  option = gt_option_new_string("qii",
                                "Query input index (encseq)",
                                arguments->dbs_queryname,
                                "");
  gt_option_parser_add_option(op, option);

  /* -seedlength */
  op_len = gt_option_new_uint_min_max("seedlength",
                                      "Minimum length of a seed",
                                      &arguments->dbs_seedlength,
                                      14UL, 1UL, 32UL);
  gt_option_parser_add_option(op, op_len);

  /* -diagbandwidth */
  option = gt_option_new_uword("diagbandwidth",
                               "Logarithm of diagonal band width (for filter)",
                               &arguments->dbs_logdiagbandwidth,
                               6UL);
  gt_option_parser_add_option(op, option);

  /* -mincoverage */
  option = gt_option_new_uword("mincoverage",
                               "Minimum coverage in two neighbouring diagonal "
                               "bands (for filter)",
                               &arguments->dbs_mincoverage,
                               35UL);
  gt_option_parser_add_option(op, option);

  /* -maxfreq */
  op_frq = gt_option_new_uword_min("maxfreq",
                                   "Maximum frequency of a k-mer (for filter)",
                                   &arguments->dbs_maxfreq,
                                   GT_UWORD_MAX, 1UL);
  gt_option_parser_add_option(op, op_frq);

  /* -t */
  op_sup = gt_option_new_uword_min("t",
                                   "Suppress k-mers occurring at least t times "
                                   "(for filter)",
                                   &arguments->dbs_suppress,
                                   GT_UWORD_MAX, 2UL);
  gt_option_exclude(op_sup, op_frq);
  gt_option_is_development_option(op_sup);
  gt_option_parser_add_option(op, op_sup);

  /* -memlimit */
  op_mem = gt_option_new_string("memlimit",
                                "Maximum memory usage to determine the maximum "
                                "frequency of a k-mer (for filter)",
                                arguments->dbs_memlimit_str,
                                "");
  gt_option_parser_add_option(op, op_mem);

  /* -debug-kmer */
  option = gt_option_new_bool("debug-kmer",
                              "Output KmerPos lists",
                              &arguments->dbs_debug_kmer,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -debug-seedpair */
  option = gt_option_new_bool("debug-seedpair",
                              "Output SeedPair lists",
                              &arguments->dbs_debug_seedpair,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -verify */
  option = gt_option_new_bool("verify",
                              "Check that k-mer seeds occur in the sequences",
                              &arguments->dbs_verify,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* SEED EXTENSION OPTIONS */

  /* -extendxdrop */
  op_xdr = gt_option_new_uword_min_max("extendxdrop",
                                       "Extend seed to both sides using xdrop "
                                       "algorithm, optional parameter "
                                       "specifies sensitivity",
                                       &arguments->se_extendxdrop,
                                       97UL, 90UL, 100UL);
  gt_option_argument_is_optional(op_xdr);
  gt_option_parser_add_option(op, op_xdr);
  arguments->se_option_xdrop = gt_option_ref(op_xdr);

  /* -xdropbelow */
  op_xbe = gt_option_new_word("xdropbelow",
                              "Specify xdrop cutoff score (0 means "
                              "automatically defined depending on minidentity)",
                              &arguments->se_xdropbelowscore,
                              0L);
  gt_option_imply(op_xbe, op_xdr);
  gt_option_parser_add_option(op, op_xbe);

  /* -extendgreedy */
  op_gre = gt_option_new_uword_min_max("extendgreedy",
                                       "Extend seed to both sides using greedy "
                                       "algorithm, optional parameter "
                                       "specifies sensitivity",
                                       &arguments->se_extendgreedy,
                                       97UL, 90UL, 100UL);
  gt_option_argument_is_optional(op_gre);
  gt_option_exclude(op_gre, op_xdr);
  gt_option_parser_add_option(op, op_gre);
  arguments->se_option_greedy = gt_option_ref(op_gre);

  /* -history */
  op_his = gt_option_new_uword_min_max("history",
                                       "Size of (mis)match history in range [1"
                                       "..64] (trimming for greedy extension)",
                                       &arguments->se_historysize,
                                       60UL, 1UL, 64UL);
  gt_option_imply(op_his, op_gre);
  gt_option_parser_add_option(op, op_his);

  /* -maxalilendiff */
  op_dif = gt_option_new_uword("maxalilendiff",
                               "Maximum difference of alignment length "
                               "(trimming for greedy extension)",
                               &arguments->se_maxalilendiff, 0UL);
  gt_option_imply(op_dif, op_gre);
  gt_option_is_development_option(op_dif);
  gt_option_parser_add_option(op, op_dif);

  /* -percmathistory */
  op_pmh = gt_option_new_uword_min_max("percmathistory",
                                       "percentage of matches required in "
                                       "history (for greedy extension)",
                                       &arguments->se_perc_match_hist,
                                       0UL, 1UL, 100UL);
  gt_option_imply(op_pmh, op_gre);
  gt_option_is_development_option(op_pmh);
  gt_option_parser_add_option(op, op_pmh);

  /* -bias-parameters */
  op_bia = gt_option_new_bool("bias-parameters",
                              "Use -maxalilendiff 30 and let percmathistory "
                              "depend on minidentiy and DNA base distribution",
                              &arguments->bias_parameters,
                              false);
  gt_option_imply(op_bia, op_gre);
  gt_option_exclude(op_bia, op_pmh);
  gt_option_exclude(op_bia, op_dif);
  gt_option_is_development_option(op_bia);
  gt_option_parser_add_option(op, op_bia);

  /* -cam */
  op_cam = gt_option_new_string("cam",
                                gt_cam_extendgreedy_comment(),
                                arguments->se_char_access_mode,
                                "");
  gt_option_is_development_option(op_cam);
  gt_option_parser_add_option(op, op_cam);

  /* -l */
  op_len = gt_option_new_uword_min("l",
                                   "Minimum alignment length "
                                   "(for seed extension)",
                                   &arguments->se_alignlength,
                                   20UL, 1UL);
  gt_option_imply_either_2(op_len, op_xdr, op_gre);
  gt_option_parser_add_option(op, op_len);

  /* -minidentity */
  op_err = gt_option_new_uword_min_max("minidentity",
                                       "Minimum identity of matches "
                                       "(for seed extension)",
                                       &arguments->se_minidentity,
                                       80UL, GT_EXTEND_MIN_IDENTITY_PERCENTAGE,
                                       99UL);
  gt_option_imply_either_2(op_err, op_xdr, op_gre);
  gt_option_parser_add_option(op, op_err);

  /* -a */
  op_ali = gt_option_new_uword_min("a",
                                   "show alignments/sequences (optional "
                                   "argument is number of columns per line)",
                                   &arguments->se_alignmentwidth,
                                   70, 20);
  gt_option_argument_is_optional(op_ali);
  gt_option_parser_add_option(op, op_ali);
  arguments->se_option_withali = gt_option_ref(op_ali);

  /* -mirror */
  option = gt_option_new_bool("mirror",
                              "Add reverse complement reads",
                              &arguments->mirror,
                              false);
  gt_option_parser_add_option(op, option);

  /* -overlappingseeds */
  option = gt_option_new_bool("overlappingseeds",
                              "Allow overlapping SeedPairs",
                              &arguments->overlappingseeds,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -benchmark */
  option = gt_option_new_bool("benchmark",
                              "Measure total running time and be silent",
                              &arguments->benchmark,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -seed-display */
  option = gt_option_new_bool("seed-display",
                              "Display seeds in #-line",
                              &arguments->seed_display,
                              false);
  gt_option_is_development_option(option);
  gt_option_parser_add_option(op, option);

  /* -v */
  option = gt_option_new_verbose(&arguments->verbose);
  gt_option_parser_add_option(op, option);

  return op;
}
示例#2
0
static GtOptionParser* gt_tir_option_parser_new(void *tool_arguments)
{
  GtTirArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *optionindex,      /* index */
           *optionseed,       /* minseedlength */
           *optionminlentir,  /* minimal length of TIR */
           *optionmaxlentir,  /* maximal length of TIR */
           *optionmindisttir, /* minimal distance of TIRs */
           *optionmaxdisttir, /* maximal distance of TIRs */
           *optionmat,        /* arbitrary scores */
           *optionmis,
           *optionins,
           *optiondel,
           *optionxdrop,      /* xdropbelowscore for extension alignment */
           *optionsimilar,    /* similarity threshold */
           *optionoverlaps,   /* for overlaps */
           *optionmintsd,     /* minimal length for Target Site Duplication */
           *optionmaxtsd,     /* maximal length for Target Site Duplication */
           *optionvicinity,   /* vicinity around TIRs to be searched for TSDs */
           *optionhmms,
           *optionevalcutoff,
           *optionpdomcutoff,
           *optionmaxgap;
  static const char *overlaps[] = {
    "best", /* default */
    "no",
    "all",
    NULL
  };
  static const char *cutoffs[] = {
    "NONE",
    "GA",
    "TC",
    NULL
  };
  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[option ...] -index INDEXNAME",
                            "Identify Terminal Inverted Repeat (TIR) elements,"
                            "such as DNA transposons.");

  /* -index */
  optionindex = gt_option_new_string("index",
                                     "specify the name of the enhanced suffix "
                                     "array index (mandatory)",
                                     arguments->str_indexname, NULL);
  gt_option_is_mandatory(optionindex);
  gt_option_parser_add_option(op, optionindex);

   /* -seed */
  optionseed = gt_option_new_uword_min("seed",
                                       "specify minimum seed length for "
                                       "exact repeats",
                                       &arguments->min_seed_length, 20UL, 2UL);
  gt_option_parser_add_option(op, optionseed);

  /* -minlentir */
  optionminlentir = gt_option_new_uword_min_max("mintirlen",
                                                "specify minimum length for "
                                                "each TIR",
                                                &arguments->min_TIR_length,
                                                27UL, 1UL, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, optionminlentir);

  /* -maxlentir */
  optionmaxlentir = gt_option_new_uword_min_max("maxtirlen",
                                                "specify maximum length for "
                                                "each TIR",
                                                &arguments->max_TIR_length,
                                                1000UL, 1UL, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, optionmaxlentir);

  /* -mindisttir */
  optionmindisttir = gt_option_new_uword_min_max("mintirdist",
                                                 "specify minimum distance of "
                                                 "TIRs",
                                                 &arguments->min_TIR_distance,
                                                 100UL, 1UL, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, optionmindisttir);

  /* -maxdisttir */
  optionmaxdisttir = gt_option_new_uword_min_max("maxtirdist",
                                                 "specify maximum distance of "
                                                 "TIRs",
                                                 &arguments->max_TIR_distance,
                                                 10000UL, 1UL, GT_UNDEF_UWORD);
  gt_option_parser_add_option(op, optionmaxdisttir);

  optionmat = gt_option_new_int_min("mat",
                                    "specify matchscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.mat, 2, 1);
  gt_option_parser_add_option(op, optionmat);

  /* -mis */
  optionmis = gt_option_new_int_max("mis",
                                    "specify mismatchscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.mis, -2, -1);
  gt_option_parser_add_option(op, optionmis);

  /* -ins */
  optionins = gt_option_new_int_max("ins",
                                    "specify insertionscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.ins, -3, -1);
  gt_option_parser_add_option(op, optionins);

  /* -del */
  optiondel = gt_option_new_int_max("del",
                                    "specify deletionscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.del, -3, -1);
  gt_option_parser_add_option(op, optiondel);

  /* -xdrop */
  optionxdrop = gt_option_new_int_min("xdrop",
                                      "specify xdropbelowscore for "
                                      "extension-alignment",
                                      &arguments->xdrop_belowscore, (int) 5,
                                      (int) 0);
  gt_option_parser_add_option(op, optionxdrop);

  /* -similar */
  optionsimilar = gt_option_new_double_min_max("similar",
                                               "specify similaritythreshold in "
                                               "range [1..100%]",
                                               &arguments->similarity_threshold,
                                               (double) 85.0, (double) 0.0,
                                               100.0);
  gt_option_parser_add_option(op, optionsimilar);

  /* -overlaps */
  optionoverlaps = gt_option_new_choice("overlaps", "specify no|best|all",
                                        arguments->str_overlaps,
                                        overlaps[0], overlaps);
  gt_option_parser_add_option(op, optionoverlaps);
  arguments->optionoverlaps = gt_option_ref(optionoverlaps);

  /* -mintsd */
  optionmintsd = gt_option_new_uword_min_max("mintsd",
                                             "specify minimum length for each "
                                             "TSD",
                                             &arguments->min_TSD_length,
                                             2U, 0, GT_UNDEF_UINT);
  gt_option_parser_add_option(op, optionmintsd);

  /* -maxtsd */
  optionmaxtsd = gt_option_new_uword_min_max("maxtsd",
                                             "specify maximum length for each "
                                             "TSD",
                                             &arguments->max_TSD_length,
                                             11U, 0, GT_UNDEF_UINT);
  gt_option_parser_add_option(op, optionmaxtsd);
  gt_option_imply(optionmaxtsd, optionmintsd);

  /* -vicinity */
  optionvicinity = gt_option_new_uword_min_max("vic",
                                               "specify the number of "
                                               "nucleotides (to the left and "
                                               "to the right) that will be "
                                               "searched for TSDs around 5' "
                                               "and 3' boundary of predicted "
                                               "TIRs",
                                               &arguments->vicinity,
                                               60U, 1U, 500U);
  gt_option_parser_add_option(op, optionvicinity);

  optionhmms = gt_option_new_filename_array("hmms",
                                    "profile HMM models for domain detection "
                                    "(separate by spaces, finish with --) in "
                                    "HMMER3 format\n"
                                    "Omit this option to disable pHMM search.",
                                    arguments->hmm_files);
  gt_option_parser_add_option(op, optionhmms);

  optionevalcutoff = gt_option_new_probability("pdomevalcutoff",
                                    "global E-value cutoff for pHMM search\n"
                                    "default 1E-6",
                                    &arguments->evalue_cutoff,
                                    0.000001);
  gt_option_parser_add_option(op, optionevalcutoff);
  gt_option_hide_default(optionevalcutoff);
  gt_option_imply(optionevalcutoff, optionhmms);

  optionpdomcutoff = gt_option_new_choice("pdomcutoff",
                                       "model-specific score cutoff\n"
                                       "choose from TC (trusted cutoff) | "
                                       "GA (gathering cutoff) | "
                                       "NONE (no cutoffs)",
                             arguments->cutoffs, cutoffs[1], cutoffs);
  gt_option_parser_add_option(op, optionpdomcutoff);
  gt_option_imply(optionpdomcutoff, optionhmms);

  /* XXX: make -pdomcutoff and -pdomevalcutoff mutually exclusive */

  optionmaxgap = gt_option_new_uint("maxgaplen",
                         "maximal allowed gap size between fragments (in amino "
                         "acids) when chaining pHMM hits for a protein domain",
                         &arguments->chain_max_gap_length,
                         50U);
  gt_option_parser_add_option(op, optionmaxgap);
  gt_option_is_extended_option(optionmaxgap);
  gt_option_imply(optionmaxgap, optionhmms);

  return op;
}