static GtOptionParser* gt_condenseq_compress_option_parser_new(void *tool_arguments) { GtCondenseqCompressArguments *arguments = tool_arguments; GtOptionParser *op; GtOption *option, *option_fraction; gt_assert(arguments); /* init */ op = gt_option_parser_new("[options] INPUTENCSEQ", "Compresses a GtEncseq to a UniqueEncseq."); /* -indexname */ option = gt_option_new_string("indexname", "path and basename of files to store", arguments->indexname, NULL); gt_option_parser_add_option(op, option); /* -kmersize */ option = gt_option_new_uint_min("kmersize", "kmer-size used for the seeds, default " "depends on alphabet size", &arguments->kmersize, GT_UNDEF_UINT, 2U); gt_option_parser_add_option(op, option); /* -windowsize */ option = gt_option_new_uint("windowsize", "Size of window in which to search for hit pairs " "of kmers, has to be larger than kmersize" , &arguments->windowsize, GT_UNDEF_UINT); gt_option_parser_add_option(op, option); /* -initsize */ option = gt_option_new_uword("initsize", "length of inital unique database in bases, " "should be larger than -alignlength", &arguments->initsize, GT_UNDEF_UWORD); gt_option_parser_add_option(op, option); /* -alignlength */ option = gt_option_new_uword("alignlength", "required minimal length of an xdrop-alignment, " "should be larger than -windowsize", &arguments->minalignlength, GT_UNDEF_UWORD); gt_option_parser_add_option(op, option); /* -cutoff */ option = gt_option_new_uword("cutoff", "if a kmer is found more often than this value " "it will be ignored for alignment searches. " "Setting this to 0 will disable cutoffs, " "leaving it undefined will use a cutoff based " "on the mean number of occurences of a k-word.", &arguments->cutoff_value, GT_UNDEF_UWORD); gt_option_is_extended_option(option); gt_option_parser_add_option(op, option); /* -fraction */ option_fraction = gt_option_new_uword("fraction", "when cutoffs aren'd disabled and no specific " "value is set the mean number of occurrences " "of each kmer divided by -fraction will be used " "as cutoff", &arguments->fraction, (GtUword) 2); gt_option_is_extended_option(option_fraction); gt_option_exclude(option, option_fraction); gt_option_parser_add_option(op, option_fraction); /* -disable_prune */ option = gt_option_new_bool("disable_prune", "when cutoffs and this option are set, " "the database will still save every kmer, even " "though only cutoff many kmers will be used.", &arguments->prune, false); gt_option_is_extended_option(option); gt_option_parser_add_option(op, option); /* -mat */ option = gt_option_new_int("mat", "matchscore for extension-alignment, " "requirements: mat > mis, mat > 2ins, mat > 2del", &arguments->scores.mat, 2); gt_option_is_extended_option(option); gt_option_parser_add_option(op, option); /* -mis */ option = gt_option_new_int("mis", "mismatchscore for extension-alignment, ", &arguments->scores.mis, -1); gt_option_is_extended_option(option); gt_option_parser_add_option(op, option); /* -ins */ option = gt_option_new_int("ins", "insertionscore for extension-alignment", &arguments->scores.ins, -2); gt_option_is_extended_option(option); gt_option_parser_add_option(op, option); /* -del */ option = gt_option_new_int("del", "deletionscore for extension-alignment", &arguments->scores.del, -2); gt_option_is_extended_option(option); gt_option_parser_add_option(op, option); /* -xdrop */ option = gt_option_new_word("xdrop", "xdrop score for extension-alignment", &arguments->xdrop, (GtWord) 3); gt_option_is_extended_option(option); gt_option_parser_add_option(op, option); /* -brute_force */ option = gt_option_new_bool("brute_force", "disable filtering of seeds. " "Incompatible with -diagonals yes " "or -full_diags yes", &arguments->brute, false); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* -diagonals */ option = gt_option_new_bool("diagonals", "use sparse diagonals. " "Incompatible with -brute_force yes. " "Disabling both diagonals will result in simple " "filtering of seed positions.", &arguments->diags, false); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* -full_diags */ option = gt_option_new_bool("full_diags", "use full (time efficient " "space inefficient) diagonals. " "Incompatible with -brute_force yes. " "Disabling both diagonals will result in simple " "filtering of seed positions.", &arguments->full_diags, false); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* -clean_percent */ option = gt_option_new_uint("diags_clean", "Percentage of sparse diagonals that is allowed " "to be marked as deletable. Sensible default is " "set." , &arguments->clean_percent, GT_UNDEF_UINT); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* -verbose */ option = gt_option_new_bool("verbose", "enable verbose output", &arguments->verbose, false); gt_option_parser_add_option(op, option); /* -kdb*/ option = gt_option_new_bool("kdb", "prints out the kmer database (frequency " "of each kmer), if -verbose each startposition " "will be shown instead", &arguments->kdb, false); gt_option_parser_add_option(op, option); return op; }
static GtOptionParser* gt_genomediff_option_parser_new(void *tool_arguments) { GtGenomediffArguments *arguments = tool_arguments; GtOptionParser *op; GtOption *option, *option_unitfile; static const char *indextypes[] = { "esa", "pck", "encseq", NULL }; gt_assert(arguments); /* init */ op = gt_option_parser_new("[option ...] " "(INDEX | -indexname NAME SEQFILE SEQFILE [...]) ", "Calculates Kr: pairwise distances between genomes."); /* options */ option = gt_option_new_choice("indextype", "specify type of index, one of: " "esa|pck|encseq. Where encseq is an encoded " "sequence and an enhanced suffix array will be " "constructed only in memory.", arguments->indextype, indextypes[2], indextypes); gt_option_parser_add_option(op, option); option = gt_option_new_string("indexname", "Basename of encseq to construct.", arguments->indexname, NULL); gt_option_parser_add_option(op, option); /*-unitfile*/ option_unitfile = gt_option_new_filename("unitfile", "specifies genomic units, " "see below for description.", arguments->unitfile); gt_option_parser_add_option(op, option_unitfile); arguments->ref_unitfile = gt_option_ref(option_unitfile); /* encseq options */ arguments->loadopts = gt_encseq_options_register_loading(op, arguments->indexname); gt_option_is_development_option( gt_encseq_options_lossless_option(arguments->loadopts)); /* esa options */ arguments->idxopts = gt_index_options_register_esa_noout(op); gt_option_is_development_option( gt_index_options_spmopt_option(arguments->idxopts)); /* scan */ option = gt_option_new_bool("scan", "do not load esa index but scan " "it sequentially.", &arguments->scanfile, true); gt_option_is_extended_option(option); gt_option_parser_add_option(op, option); /* dev options */ /* -max_n */ option = gt_option_new_uword("max_n", "Number of precalculated values " "for ln(n!) and pmax(x).", &arguments->max_ln_n_fac, 1000UL); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* -maxdepth */ option = gt_option_new_int("maxdepth", "max depth of .pbi-file, use with " "-indextype pck.", &arguments->user_max_depth, -1); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* thresholds */ /* divergence error */ option = gt_option_new_double("thr", "Threshold for difference (du, dl) in " "divergence calculation.\n" "default: 1e-9", &arguments->divergence_threshold, 1e-9); gt_option_is_extended_option(option); gt_option_hide_default(option); gt_option_parser_add_option(op, option); /* expected shulen error */ option = gt_option_new_double("abs_err", "absolute error for expected shulen " "calculation.\n" "default: 1e-5", &arguments->divergence_abs_err, 1e-5); gt_option_is_extended_option(option); gt_option_hide_default(option); gt_option_parser_add_option(op, option); /* relative expected shulen error */ option = gt_option_new_double("rel_err", "relative error for expected shulen " "calculation.\n" "default: 1e-3", &arguments->divergence_rel_err, 1e-3); gt_option_is_extended_option(option); gt_option_hide_default(option); gt_option_parser_add_option(op, option); /* M */ option = gt_option_new_double("M", "threshold for minimum logarithm.\n" "default: DBL_MIN", &arguments->divergence_m, DBL_MIN); gt_option_is_extended_option(option); gt_option_hide_default(option); gt_option_parser_add_option(op, option); /* -v */ option = gt_option_new_verbose(&arguments->verbose); gt_option_parser_add_option(op, option); /* mail */ gt_option_parser_set_mail_address(op, "<*****@*****.**>"); /* doc */ gt_option_parser_set_comment_func(op, gt_gtdata_show_help, NULL); return op; }
static GtOptionParser* gt_ltrdigest_option_parser_new(void *tool_arguments) { GtLTRdigestOptions *arguments = tool_arguments; GtOptionParser *op; GtOption *o, *ot, *oto; GtOption *oh, *oc, *oeval; static const char *cutoffs[] = {"NONE", "GA", "TC", NULL}; static GtRange pptlen_defaults = { 8UL, 30UL}, uboxlen_defaults = { 3UL, 30UL}, pbsalilen_defaults = {11UL, 30UL}, pbsoffsetlen_defaults = { 0UL, 5UL}, pbstrnaoffsetlen_defaults = { 0UL, 5UL}; gt_assert(arguments); /* init */ op = gt_option_parser_new("[option ...] gff3_file", "Identifies and annotates sequence features in LTR " "retrotransposon candidates."); /* Output files */ oto = gt_option_new_string("outfileprefix", "prefix for output files (e.g. 'foo' will create " "files called 'foo_*.csv' and 'foo_*.fas')\n" "Omit this option for GFF3 output only.", arguments->prefix, NULL); gt_option_parser_add_option(op, oto); gt_option_hide_default(oto); o = gt_option_new_bool("metadata", "output metadata (run conditions) to separate file", &arguments->print_metadata, true); gt_option_parser_add_option(op, o); gt_option_imply(o, oto); o = gt_option_new_uint("seqnamelen", "set maximal length of sequence names in FASTA headers" " (e.g. for clustalw or similar tools)", &arguments->seqnamelen, 20U); gt_option_parser_add_option(op, o); /* PPT search options */ o = gt_option_new_range("pptlen", "required PPT length range", &arguments->ppt_len, &pptlen_defaults); gt_option_parser_add_option(op, o); o = gt_option_new_range("uboxlen", "required U-box length range", &arguments->ubox_len, &uboxlen_defaults); gt_option_parser_add_option(op, o); o = gt_option_new_uint("uboxdist", "allowed U-box distance range from PPT", &arguments->max_ubox_dist, 0); gt_option_parser_add_option(op, o); o = gt_option_new_uint("pptradius", "radius around beginning of 3' LTR " "to search for PPT", &arguments->ppt_radius, 30U); gt_option_parser_add_option(op, o); o = gt_option_new_probability("pptrprob", "purine emission probability inside PPT", &arguments->ppt_purine_prob, PPT_PURINE_PROB); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); o = gt_option_new_probability("pptyprob", "pyrimidine emission probability inside PPT", &arguments->ppt_pyrimidine_prob, PPT_PYRIMIDINE_PROB); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); o = gt_option_new_probability("pptgprob", "background G emission probability outside PPT", &arguments->bkg_g_prob, BKG_G_PROB); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); o = gt_option_new_probability("pptcprob", "background C emission probability outside PPT", &arguments->bkg_c_prob, BKG_C_PROB); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); o = gt_option_new_probability("pptaprob", "background A emission probability outside PPT", &arguments->bkg_a_prob, BKG_A_PROB); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); o = gt_option_new_probability("ppttprob", "background T emission probability outside PPT", &arguments->bkg_t_prob, BKG_T_PROB); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); o = gt_option_new_probability("pptuprob", "U/T emission probability inside U-box", &arguments->ubox_u_prob, UBOX_U_PROB); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); /* PBS search options */ ot = gt_option_new_filename("trnas", "tRNA library in multiple FASTA format for PBS " "detection\n" "Omit this option to disable PBS search.", arguments->trna_lib); gt_option_parser_add_option(op, ot); gt_option_hide_default(ot); o = gt_option_new_range("pbsalilen", "required PBS/tRNA alignment length range", &arguments->alilen, &pbsalilen_defaults); gt_option_parser_add_option(op, o); gt_option_imply(o, ot); o = gt_option_new_range("pbsoffset", "allowed PBS offset from LTR boundary range", &arguments->offsetlen, &pbsoffsetlen_defaults); gt_option_parser_add_option(op, o); gt_option_imply(o, ot); o = gt_option_new_range("pbstrnaoffset", "allowed PBS/tRNA 3' end alignment offset range", &arguments->trnaoffsetlen, &pbstrnaoffsetlen_defaults); gt_option_parser_add_option(op, o); gt_option_imply(o, ot); o = gt_option_new_uint("pbsmaxedist", "maximal allowed PBS/tRNA alignment unit " "edit distance", &arguments->max_edist, 1U); gt_option_parser_add_option(op, o); gt_option_imply(o, ot); o = gt_option_new_uint("pbsradius", "radius around end of 5' LTR " "to search for PBS", &arguments->pbs_radius, 30U); gt_option_parser_add_option(op, o); gt_option_imply(o, ot); /* Protein domain search options */ oh = gt_option_new_filename_array("hmms", "profile HMM models for domain detection " "(separate by spaces, finish with --) in " "HMMER3 format\n" "Omit this option to disable pHMM search.", arguments->hmm_files); gt_option_parser_add_option(op, oh); oeval = gt_option_new_probability("pdomevalcutoff", "global E-value cutoff for pHMM search\n" "default 1E-6", &arguments->evalue_cutoff, 0.000001); gt_option_parser_add_option(op, oeval); gt_option_is_extended_option(oeval); gt_option_hide_default(oeval); gt_option_imply(oeval, oh); oc = gt_option_new_choice("pdomcutoff", "model-specific score cutoff\n" "choose from TC (trusted cutoff) | " "GA (gathering cutoff) | " "NONE (no cutoffs)", arguments->cutoffs, cutoffs[0], cutoffs); gt_option_parser_add_option(op, oc); gt_option_is_extended_option(oeval); gt_option_imply(oeval, oh); o = gt_option_new_bool("aliout", "output pHMM to amino acid sequence alignments", &arguments->write_alignments, false); gt_option_parser_add_option(op, o); gt_option_imply(o, oh); gt_option_imply(o, oto); o = gt_option_new_bool("aaout", "output amino acid sequences for protein domain " "hits", &arguments->write_aaseqs, false); gt_option_parser_add_option(op, o); gt_option_imply(o, oh); gt_option_imply(o, oto); o = gt_option_new_bool("allchains", "output features from all chains and unchained " "features, labeled with chain numbers", &arguments->output_all_chains, false); gt_option_parser_add_option(op, o); gt_option_imply(o, oh); o = gt_option_new_uint("maxgaplen", "maximal allowed gap size between fragments (in amino " "acids) when chaining pHMM hits for a protein domain", &arguments->chain_max_gap_length, 50U); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); gt_option_imply(o, oh); o = gt_option_new_uword("threads", "DEPRECATED, only included for compatibility reasons!" " Use the -j parameter of the 'gt' call instead.", &arguments->nthreads, 0); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); /* Extended PBS options */ o = gt_option_new_int("pbsmatchscore", "match score for PBS/tRNA alignments", &arguments->ali_score_match, 5); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); gt_option_imply(o, ot); o = gt_option_new_int("pbsmismatchscore", "mismatch score for PBS/tRNA alignments", &arguments->ali_score_mismatch, -10); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); gt_option_imply(o, ot); o = gt_option_new_int("pbsinsertionscore", "insertion score for PBS/tRNA alignments", &arguments->ali_score_insertion, -20); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); gt_option_imply(o, ot); o = gt_option_new_int("pbsdeletionscore", "deletion score for PBS/tRNA alignments", &arguments->ali_score_deletion, -20); gt_option_parser_add_option(op, o); gt_option_is_extended_option(o); gt_option_imply(o, ot); /* verbosity */ o = gt_option_new_verbose(&arguments->verbose); gt_option_parser_add_option(op, o); /* output file options */ gt_output_file_info_register_options(arguments->ofi, op, &arguments->outfp); /* region mapping and sequence source options */ gt_seqid2file_register_options_ext(op, arguments->s2fi, false, false); return op; }