static GtOptionParser* gt_kmer_database_option_parser_new(void *tool_arguments) { GtKmerDatabaseArguments *arguments = tool_arguments; GtOptionParser *op; GtOption *option, *option_verbose, *option_use_cutoff, *option_hash, *option_mean_cutoff; gt_assert(arguments); /* init */ op = gt_option_parser_new("[option ...] [file]", "Makes a GtKmerDatabase from the input file."); /* -kmersize */ option = gt_option_new_uint_min_max("kmersize", "kmersize used", &arguments->kmersize, 3U, 1U, 10U); gt_option_parser_add_option(op, option); /* -verbose */ option_verbose = gt_option_new_bool("verbose", "prints out results of " "merging", &arguments->verbose, false); gt_option_parser_add_option(op, option_verbose); /* -merge_only */ option = gt_option_new_bool("merge_only", "only uses merge to build DB, " "doesn_t build two DBs to compare merge with a " "different method (much faster). It also allows " "for random intervals which are biffer than the " "maximum buffer size (will be split internally).", &arguments->merge_only, false); gt_option_parser_add_option(op, option); /* -use_cutoff */ option_use_cutoff = gt_option_new_bool("use_cutoff", "uses a cutoff. see " "-set_cutoff description. Only works " "with merge_only", &arguments->cutoff, false); gt_option_parser_add_option(op, option_use_cutoff); gt_option_imply(option_use_cutoff, option); /* -set_cutoff */ option = gt_option_new_uword_min("set_cutoff", "kmers occuring more often " "than this value won't be saved", &arguments->cutoff_value, (GtUword) 30, (GtUword) 1); gt_option_parser_add_option(op, option); gt_option_imply(option, option_use_cutoff); /* -mean_cutoff */ option_mean_cutoff = gt_option_new_bool("mean_cutoff", "2*mean of kmer " "occurrence will be" " used as cutoff value", &arguments->mean_cutoff, false); gt_option_parser_add_option(op, option_mean_cutoff); gt_option_imply(option_mean_cutoff, option_use_cutoff); gt_option_exclude(option_mean_cutoff, option); /* -disable_prune */ option = gt_option_new_bool("disable_prune", "disables the removel of kmers, " "which occure more often than the cutoff.", &arguments->prune, false); gt_option_parser_add_option(op, option); gt_option_imply(option, option_use_cutoff); /* -use_hash */ option_hash = gt_option_new_bool("use_hash", "saves each kmer in kdb and " "also in a hash. afterwards both will be " "accessed and time for saving and " "accessing will be shown.", &arguments->use_hash, false); gt_option_parser_add_option(op, option_hash); gt_option_exclude(option_hash, option_use_cutoff); gt_option_exclude(option_hash, option_verbose); /* -benchmark */ option = gt_option_new_bool("benchmark", "measures the time the tool takes to" " fill the database. Doesn't test for consistency" " though!", &arguments->bench, false); gt_option_parser_add_option(op, option); /* -bsize */ option = gt_option_new_uword_min("bsize", "size of the buffer", &arguments->sb_size, (GtUword) 100000, (GtUword) 2); gt_option_parser_add_option(op, option); gt_option_exclude(option, option_hash); /* -outfile */ option = gt_option_new_string("outfile", "specifies file for verbose output", arguments->print_filename, NULL); gt_option_parser_add_option(op, option); return op; }
static GtOptionParser* gt_seed_extend_option_parser_new(void *tool_arguments) { GtSeedExtendArguments *arguments = tool_arguments; GtOptionParser *op; GtOption *option, *op_gre, *op_xdr, *op_cam, *op_his, *op_dif, *op_pmh, *op_len, *op_err, *op_xbe, *op_sup, *op_frq, *op_mem, *op_ali, *op_bia; gt_assert(arguments != NULL); /* init */ op = gt_option_parser_new("[option ...] encseq_basename [encseq_basename]", "Calculate local alignments using the seed and " "extend algorithm."); /* DIAGBANDSEED OPTIONS */ /* -ii */ option = gt_option_new_string("ii", "Input index for encseq encoded sequences", arguments->dbs_indexname, ""); gt_option_is_mandatory(option); gt_option_parser_add_option(op, option); /* -qii */ option = gt_option_new_string("qii", "Query input index (encseq)", arguments->dbs_queryname, ""); gt_option_parser_add_option(op, option); /* -seedlength */ op_len = gt_option_new_uint_min_max("seedlength", "Minimum length of a seed", &arguments->dbs_seedlength, 14UL, 1UL, 32UL); gt_option_parser_add_option(op, op_len); /* -diagbandwidth */ option = gt_option_new_uword("diagbandwidth", "Logarithm of diagonal band width (for filter)", &arguments->dbs_logdiagbandwidth, 6UL); gt_option_parser_add_option(op, option); /* -mincoverage */ option = gt_option_new_uword("mincoverage", "Minimum coverage in two neighbouring diagonal " "bands (for filter)", &arguments->dbs_mincoverage, 35UL); gt_option_parser_add_option(op, option); /* -maxfreq */ op_frq = gt_option_new_uword_min("maxfreq", "Maximum frequency of a k-mer (for filter)", &arguments->dbs_maxfreq, GT_UWORD_MAX, 1UL); gt_option_parser_add_option(op, op_frq); /* -t */ op_sup = gt_option_new_uword_min("t", "Suppress k-mers occurring at least t times " "(for filter)", &arguments->dbs_suppress, GT_UWORD_MAX, 2UL); gt_option_exclude(op_sup, op_frq); gt_option_is_development_option(op_sup); gt_option_parser_add_option(op, op_sup); /* -memlimit */ op_mem = gt_option_new_string("memlimit", "Maximum memory usage to determine the maximum " "frequency of a k-mer (for filter)", arguments->dbs_memlimit_str, ""); gt_option_parser_add_option(op, op_mem); /* -debug-kmer */ option = gt_option_new_bool("debug-kmer", "Output KmerPos lists", &arguments->dbs_debug_kmer, false); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* -debug-seedpair */ option = gt_option_new_bool("debug-seedpair", "Output SeedPair lists", &arguments->dbs_debug_seedpair, false); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* -verify */ option = gt_option_new_bool("verify", "Check that k-mer seeds occur in the sequences", &arguments->dbs_verify, false); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* SEED EXTENSION OPTIONS */ /* -extendxdrop */ op_xdr = gt_option_new_uword_min_max("extendxdrop", "Extend seed to both sides using xdrop " "algorithm, optional parameter " "specifies sensitivity", &arguments->se_extendxdrop, 97UL, 90UL, 100UL); gt_option_argument_is_optional(op_xdr); gt_option_parser_add_option(op, op_xdr); arguments->se_option_xdrop = gt_option_ref(op_xdr); /* -xdropbelow */ op_xbe = gt_option_new_word("xdropbelow", "Specify xdrop cutoff score (0 means " "automatically defined depending on minidentity)", &arguments->se_xdropbelowscore, 0L); gt_option_imply(op_xbe, op_xdr); gt_option_parser_add_option(op, op_xbe); /* -extendgreedy */ op_gre = gt_option_new_uword_min_max("extendgreedy", "Extend seed to both sides using greedy " "algorithm, optional parameter " "specifies sensitivity", &arguments->se_extendgreedy, 97UL, 90UL, 100UL); gt_option_argument_is_optional(op_gre); gt_option_exclude(op_gre, op_xdr); gt_option_parser_add_option(op, op_gre); arguments->se_option_greedy = gt_option_ref(op_gre); /* -history */ op_his = gt_option_new_uword_min_max("history", "Size of (mis)match history in range [1" "..64] (trimming for greedy extension)", &arguments->se_historysize, 60UL, 1UL, 64UL); gt_option_imply(op_his, op_gre); gt_option_parser_add_option(op, op_his); /* -maxalilendiff */ op_dif = gt_option_new_uword("maxalilendiff", "Maximum difference of alignment length " "(trimming for greedy extension)", &arguments->se_maxalilendiff, 0UL); gt_option_imply(op_dif, op_gre); gt_option_is_development_option(op_dif); gt_option_parser_add_option(op, op_dif); /* -percmathistory */ op_pmh = gt_option_new_uword_min_max("percmathistory", "percentage of matches required in " "history (for greedy extension)", &arguments->se_perc_match_hist, 0UL, 1UL, 100UL); gt_option_imply(op_pmh, op_gre); gt_option_is_development_option(op_pmh); gt_option_parser_add_option(op, op_pmh); /* -bias-parameters */ op_bia = gt_option_new_bool("bias-parameters", "Use -maxalilendiff 30 and let percmathistory " "depend on minidentiy and DNA base distribution", &arguments->bias_parameters, false); gt_option_imply(op_bia, op_gre); gt_option_exclude(op_bia, op_pmh); gt_option_exclude(op_bia, op_dif); gt_option_is_development_option(op_bia); gt_option_parser_add_option(op, op_bia); /* -cam */ op_cam = gt_option_new_string("cam", gt_cam_extendgreedy_comment(), arguments->se_char_access_mode, ""); gt_option_is_development_option(op_cam); gt_option_parser_add_option(op, op_cam); /* -l */ op_len = gt_option_new_uword_min("l", "Minimum alignment length " "(for seed extension)", &arguments->se_alignlength, 20UL, 1UL); gt_option_imply_either_2(op_len, op_xdr, op_gre); gt_option_parser_add_option(op, op_len); /* -minidentity */ op_err = gt_option_new_uword_min_max("minidentity", "Minimum identity of matches " "(for seed extension)", &arguments->se_minidentity, 80UL, GT_EXTEND_MIN_IDENTITY_PERCENTAGE, 99UL); gt_option_imply_either_2(op_err, op_xdr, op_gre); gt_option_parser_add_option(op, op_err); /* -a */ op_ali = gt_option_new_uword_min("a", "show alignments/sequences (optional " "argument is number of columns per line)", &arguments->se_alignmentwidth, 70, 20); gt_option_argument_is_optional(op_ali); gt_option_parser_add_option(op, op_ali); arguments->se_option_withali = gt_option_ref(op_ali); /* -mirror */ option = gt_option_new_bool("mirror", "Add reverse complement reads", &arguments->mirror, false); gt_option_parser_add_option(op, option); /* -overlappingseeds */ option = gt_option_new_bool("overlappingseeds", "Allow overlapping SeedPairs", &arguments->overlappingseeds, false); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* -benchmark */ option = gt_option_new_bool("benchmark", "Measure total running time and be silent", &arguments->benchmark, false); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* -seed-display */ option = gt_option_new_bool("seed-display", "Display seeds in #-line", &arguments->seed_display, false); gt_option_is_development_option(option); gt_option_parser_add_option(op, option); /* -v */ option = gt_option_new_verbose(&arguments->verbose); gt_option_parser_add_option(op, option); return op; }
static OPrval parse_options(int *parsed_args, LTRharvestoptions *lo, int argc, const char **argv, GtError *err) { GtOptionParser *op; GtOption *optionindex, *optionltrsearchseqrange, *optionseed, *optionminlenltr, *optionmaxlenltr, *optionmindistltr, *optionmaxdistltr, *optionmintsd, *optionmaxtsd, *optionsimilar, *optionmotif, *optionmotifmis, *optionvic, *optionoverlaps, *optionxdrop, *optionmat, *optionmis, *optionins, *optiondel, *optionv, *optionoffset, *optionlongoutput, *optionout, *optionoutinner, *optiongff3; OPrval oprval; GtRange default_ltrsearchseqrange = {0,0}; unsigned int vicinityforcorrectboundaries; static const char *overlaps[] = { "best", /* the default */ "no", "all", NULL }; gt_error_check(err); op = gt_option_parser_new("[option ...] -index filenameindex", "Predict LTR retrotransposons."); /* -index */ lo->str_indexname = gt_str_new(); optionindex = gt_option_new_string("index", "specify the name of the enhanced suffix " "array index (mandatory)", lo->str_indexname, NULL); gt_option_is_mandatory(optionindex); gt_option_parser_add_option(op, optionindex); /* -range */ optionltrsearchseqrange = gt_option_new_range("range", "specify sequence range in which LTRs are searched", &lo->repeatinfo.ltrsearchseqrange, &default_ltrsearchseqrange); gt_option_parser_add_option(op, optionltrsearchseqrange); /* -seed */ optionseed = gt_option_new_ulong_min("seed", "specify minimum seed length for" " exact repeats", &lo->minseedlength, 30UL, 1UL); gt_option_parser_add_option(op, optionseed); /* -minlenltr */ optionminlenltr = gt_option_new_ulong_min_max("minlenltr", "specify minimum length for each LTR", &lo->repeatinfo.lmin, 100UL, 1UL, GT_UNDEF_ULONG); gt_option_parser_add_option(op, optionminlenltr); /* -maxlenltr */ optionmaxlenltr = gt_option_new_ulong_min_max("maxlenltr", "specify maximum length for each LTR", &lo->repeatinfo.lmax, 1000UL, 1UL, GT_UNDEF_ULONG); gt_option_parser_add_option(op, optionmaxlenltr); /* -mindistltr */ optionmindistltr = gt_option_new_ulong_min_max("mindistltr", "specify minimum distance of " "LTR startpositions", &lo->repeatinfo.dmin, 1000UL, 1UL, GT_UNDEF_ULONG); gt_option_parser_add_option(op, optionmindistltr); /* -maxdistltr */ optionmaxdistltr = gt_option_new_ulong_min_max("maxdistltr", "specify maximum distance of " "LTR startpositions", &lo->repeatinfo.dmax, 15000UL, 1UL, GT_UNDEF_ULONG); gt_option_parser_add_option(op, optionmaxdistltr); /* -similar */ optionsimilar = gt_option_new_double_min_max("similar", "specify similaritythreshold in " "range [1..100%]", &lo->similaritythreshold, (double) 85.0, (double) 0.0, 100.0); gt_option_parser_add_option(op, optionsimilar); /* -mintsd */ optionmintsd = gt_option_new_uint_min_max("mintsd", "specify minimum length for each TSD", &lo->minlengthTSD, 4U, 0, GT_UNDEF_UINT); gt_option_parser_add_option(op, optionmintsd); /* -maxtsd */ optionmaxtsd = gt_option_new_uint_min_max("maxtsd", "specify maximum length for each TSD", &lo->maxlengthTSD, 20U, 0, GT_UNDEF_UINT); gt_option_parser_add_option(op, optionmaxtsd); /* -motif */ /* characters will be tranformed later into characters from virtualtree alphabet */ lo->motif.firstleft = (GtUchar) 't'; lo->motif.secondleft = (GtUchar) 'g'; lo->motif.firstright = (GtUchar) 'c'; lo->motif.secondright = (GtUchar) 'a'; lo->motif.str_motif = gt_str_new(); optionmotif = gt_option_new_string("motif", "specify 2 nucleotides startmotif + " "2 nucleotides endmotif: ****", lo->motif.str_motif, NULL); gt_option_parser_add_option(op, optionmotif); /* -motifmis */ optionmotifmis = gt_option_new_uint_min_max("motifmis", "specify maximum number of " "mismatches in motif [0,3]", &lo->motif.allowedmismatches, 4U, 0, 3U); gt_option_parser_add_option(op, optionmotifmis); /* -vic */ optionvic = gt_option_new_uint_min_max("vic", "specify the number of nucleotides (to the left and " "to the right) that will be searched " "for TSDs and/or motifs around 5' and 3' boundary " "of predicted LTR retrotransposons", &vicinityforcorrectboundaries, 60U, 1U, 500U); gt_option_parser_add_option(op, optionvic); /* -overlaps */ lo->str_overlaps = gt_str_new(); optionoverlaps = gt_option_new_choice("overlaps", "specify no|best|all", lo->str_overlaps, overlaps[0], overlaps); gt_option_parser_add_option(op, optionoverlaps); /* -xdrop */ optionxdrop = gt_option_new_int_min("xdrop", "specify xdropbelowscore for extension-alignment", &lo->xdropbelowscore, (int)5, (int)0); gt_option_parser_add_option(op, optionxdrop); /* -mat */ lo->arbitscores.gcd = 1; /* set only for initialization, do not change! */ optionmat = gt_option_new_int_min("mat", "specify matchscore for extension-alignment", &lo->arbitscores.mat, 2, 1); gt_option_parser_add_option(op, optionmat); /* -mis */ optionmis = gt_option_new_int_max("mis", "specify mismatchscore for extension-alignment", &lo->arbitscores.mis, -2, -1); gt_option_parser_add_option(op, optionmis); /* -ins */ optionins = gt_option_new_int_max("ins", "specify insertionscore for extension-alignment", &lo->arbitscores.ins, -3, -1); gt_option_parser_add_option(op, optionins); /* -del */ optiondel = gt_option_new_int_max("del", "specify deletionscore for extension-alignment", &lo->arbitscores.del, -3, -1); gt_option_parser_add_option(op, optiondel); /* -v */ optionv = gt_option_new_bool("v", "verbose mode", &lo->verbosemode, false); gt_option_parser_add_option(op, optionv); /* -longoutput */ optionlongoutput = gt_option_new_bool("longoutput", "additional motif/TSD output", &lo->longoutput, false); gt_option_parser_add_option(op, optionlongoutput); /* -out */ lo->fastaoutput = false; /* by default no FASTA output */ lo->str_fastaoutputfilename = gt_str_new(); optionout = gt_option_new_string("out", "specify FASTA outputfilename", lo->str_fastaoutputfilename, NULL); gt_option_parser_add_option(op, optionout); /* -outinner */ lo->fastaoutputinnerregion = false; lo->str_fastaoutputfilenameinnerregion = gt_str_new(); optionoutinner = gt_option_new_string("outinner", "specify FASTA outputfilename for inner regions", lo->str_fastaoutputfilenameinnerregion, NULL); gt_option_parser_add_option(op, optionoutinner); /* -gff3 */ lo->gff3output = false; /* by default no gff3 output */ lo->str_gff3filename = gt_str_new(); optiongff3 = gt_option_new_string("gff3", "specify GFF3 outputfilename", lo->str_gff3filename, NULL); gt_option_parser_add_option(op, optiongff3); /* -offset */ optionoffset = gt_option_new_ulong("offset", "offset added to GFF3 coordinates", &lo->offset, 0UL); gt_option_parser_add_option(op, optionoffset); gt_option_is_extended_option(optionoffset); /* implications */ gt_option_imply(optionmaxtsd, optionmintsd); gt_option_imply(optionmotifmis, optionmotif); gt_option_imply_either_2(optionlongoutput, optionmintsd, optionmotif); gt_option_parser_refer_to_manual(op); oprval = gt_option_parser_parse(op, parsed_args, argc, argv, gt_versionfunc, err); lo->vicinityforcorrectboundaries = (Seqpos) vicinityforcorrectboundaries; if (oprval == OPTIONPARSER_OK) { if (lo->repeatinfo.lmin > lo->repeatinfo.lmax) { gt_error_set(err,"argument of -minlenltr is greater than argument of" " -maxlenltr"); oprval = OPTIONPARSER_ERROR; } if (lo->repeatinfo.dmin > lo->repeatinfo.dmax) { gt_error_set(err, "argument of -mindistltr is greater than argument of -maxdistltr"); oprval = OPTIONPARSER_ERROR; } if (lo->repeatinfo.lmax > lo->repeatinfo.dmin) { gt_error_set(err,"argument of -maxlenltr is greater than argument of" " -mindistltr"); oprval = OPTIONPARSER_ERROR; } if (lo->minlengthTSD > lo->maxlengthTSD) { gt_error_set(err, "argument of -mintsd is greater than argument of -maxtsd"); oprval = OPTIONPARSER_ERROR; } /* If option motif is set, store characters, transform them later */ if (gt_option_is_set(optionmotif)) { if (gt_str_length(lo->motif.str_motif) != 4UL) { gt_error_set(err, "argument of -motif has not exactly 4 characters"); oprval = OPTIONPARSER_ERROR; } lo->motif.firstleft = (GtUchar) gt_str_get(lo->motif.str_motif)[0]; lo->motif.secondleft = (GtUchar) gt_str_get(lo->motif.str_motif)[1]; lo->motif.firstright = (GtUchar) gt_str_get(lo->motif.str_motif)[2]; lo->motif.secondright = (GtUchar) gt_str_get(lo->motif.str_motif)[3]; /* default if motif specified */ if (!gt_option_is_set(optionmotifmis)) { lo->motif.allowedmismatches = 0; } } /* If option overlaps is set */ if (gt_option_is_set(optionoverlaps)) { if (strcmp(gt_str_get(lo->str_overlaps), "no") == 0) { lo->bestofoverlap = false; lo->nooverlapallowed = true; } else if (strcmp(gt_str_get(lo->str_overlaps), "best") == 0 ) { lo->bestofoverlap = true; lo->nooverlapallowed = false; } else if (strcmp(gt_str_get(lo->str_overlaps), "all") == 0 ) { lo->bestofoverlap = false; lo->nooverlapallowed = false; } else { gt_assert(0); /* cannot happen */ } } else { /* default is "best" */ lo->bestofoverlap = true; /* take best prediction if overlap occurs, default */ lo->nooverlapallowed = false; /* overlapping predictions (not)allowed*/ } /* if FASTA output is set */ if (gt_option_is_set(optionout)) { lo->fastaoutput = true; } /* if FASTA output inner region is set */ if (gt_option_is_set(optionoutinner)) { lo->fastaoutputinnerregion = true; } /* if GFF3 output is set */ if (gt_option_is_set(optiongff3)) { lo->gff3output = true; } if (gt_option_is_set(optionltrsearchseqrange)) { if (lo->repeatinfo.ltrsearchseqrange.start > lo->repeatinfo.ltrsearchseqrange.end) { gt_error_set(err, "arguments of -range: first arg must be <= than second arg"); oprval = OPTIONPARSER_ERROR; } } } gt_option_parser_delete(op); return oprval; }