Ejemplo n.º 1
0
static GtOptionParser* gt_encseq_check_option_parser_new(void *tool_arguments)
{
  GtEncseqCheckArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GT_UNUSED GtOption *option;
  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[option ...] [file]",
                            "Check the consistency of an encoded "
                            "sequence file.");

  option = gt_option_new_ulong("scantrials", "specify number of scan trials",
                               &arguments->scantrials, 0);
  gt_option_parser_add_option(op, option);

  option = gt_option_new_ulong("multicharcmptrials",
                               "specify number of multicharacter trials",
                               &arguments->multicharcmptrials, 0);
  gt_option_parser_add_option(op, option);

  option = gt_option_new_ulong_min_max("prefixlength",
                                       "prefix length",
                                       &arguments->prefixlength, 0,
                                       0, MAXPREFIXLENGTH);
  gt_option_parser_add_option(op, option);

  gt_option_parser_set_min_args(op, 1);

  return op;
}
Ejemplo n.º 2
0
static GtOptionParser* gt_tir_option_parser_new(void *tool_arguments)
{
  GtTirArguments *arguments = tool_arguments;
  GtOptionParser *op;
  GtOption *optionindex,      /* index */
           *optionseed,       /* minseedlength */
           *optionminlentir,  /* minimal length of TIR */
           *optionmaxlentir,  /* maximal length of TIR */
           *optionmindisttir, /* minimal distance of TIRs */
           *optionmaxdisttir, /* maximal distance of TIRs */
           *optionmat,        /* arbitrary scores */
           *optionmis,
           *optionins,
           *optiondel,
           *optionxdrop,      /* xdropbelowscore for extension alignment */
           *optionsimilar,    /* similarity threshold */
           *optionoverlaps,   /* for overlaps */
           *optionmintsd,     /* minimal length for Target Site Duplication */
           *optionmaxtsd,     /* maximal length for Target Site Duplication */
           *optionvicinity;   /* vicinity around TIRs to be searched for TSDs */
    static const char *overlaps[] = {
    "best", /* default */
    "no",
    "all",
    NULL
  };

  gt_assert(arguments);

  /* init */
  op = gt_option_parser_new("[option ...] -index INDEXNAME",
                            "Identify Terminal Inverted Repeat (TIR) elements,"
                            "such as DNA transposons.");

  /* -index */
  optionindex = gt_option_new_string("index",
                                     "specify the name of the enhanced suffix "
                                     "array index (mandatory)",
                                     arguments->str_indexname, NULL);
  gt_option_is_mandatory(optionindex);
  gt_option_parser_add_option(op, optionindex);

   /* -seed */
  optionseed = gt_option_new_ulong_min("seed",
                                       "specify minimum seed length for "
                                       "exact repeats",
                                       &arguments->min_seed_length, 20UL, 2UL);
  gt_option_parser_add_option(op, optionseed);

  /* -minlentir */
  optionminlentir = gt_option_new_ulong_min_max("mintirlen",
                                                "specify minimum length for "
                                                "each TIR",
                                                &arguments->min_TIR_length,
                                                27UL, 1UL, GT_UNDEF_ULONG);
  gt_option_parser_add_option(op, optionminlentir);

  /* -maxlentir */
  optionmaxlentir = gt_option_new_ulong_min_max("maxtirlen",
                                                "specify maximum length for "
                                                "each TIR",
                                                &arguments->max_TIR_length,
                                                1000UL, 1UL, GT_UNDEF_ULONG);
  gt_option_parser_add_option(op, optionmaxlentir);

  /* -mindisttir */
  optionmindisttir = gt_option_new_ulong_min_max("mintirdist",
                                                 "specify minimum distance of "
                                                 "TIRs",
                                                 &arguments->min_TIR_distance,
                                                 100UL, 1UL, GT_UNDEF_ULONG);
  gt_option_parser_add_option(op, optionmindisttir);

  /* -maxdisttir */
  optionmaxdisttir = gt_option_new_ulong_min_max("maxtirdist",
                                                 "specify maximum distance of "
                                                 "TIRs",
                                                 &arguments->max_TIR_distance,
                                                 10000UL, 1UL, GT_UNDEF_ULONG);
  gt_option_parser_add_option(op, optionmaxdisttir);

  optionmat = gt_option_new_int_min("mat",
                                    "specify matchscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.mat, 2, 1);
  gt_option_parser_add_option(op, optionmat);

  /* -mis */
  optionmis = gt_option_new_int_max("mis",
                                    "specify mismatchscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.mis, -2, -1);
  gt_option_parser_add_option(op, optionmis);

  /* -ins */
  optionins = gt_option_new_int_max("ins",
                                    "specify insertionscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.ins, -3, -1);
  gt_option_parser_add_option(op, optionins);

  /* -del */
  optiondel = gt_option_new_int_max("del",
                                    "specify deletionscore for "
                                    "extension-alignment",
                                    &arguments->arbit_scores.del, -3, -1);
  gt_option_parser_add_option(op, optiondel);

  /* -xdrop */
  optionxdrop = gt_option_new_int_min("xdrop",
                                      "specify xdropbelowscore for "
                                      "extension-alignment",
                                      &arguments->xdrop_belowscore, (int) 5,
                                      (int) 0);
  gt_option_parser_add_option(op, optionxdrop);

  /* -similar */
  optionsimilar = gt_option_new_double_min_max("similar",
                                               "specify similaritythreshold in "
                                               "range [1..100%]",
                                               &arguments->similarity_threshold,
                                               (double) 85.0, (double) 0.0,
                                               100.0);
  gt_option_parser_add_option(op, optionsimilar);

  /* -overlaps */
  optionoverlaps = gt_option_new_choice("overlaps", "specify no|best|all",
                                        arguments->str_overlaps,
                                        overlaps[0], overlaps);
  gt_option_parser_add_option(op, optionoverlaps);
  arguments->optionoverlaps = gt_option_ref(optionoverlaps);

  /* -mintsd */
  optionmintsd = gt_option_new_ulong_min_max("mintsd",
                                             "specify minimum length for each "
                                             "TSD",
                                             &arguments->min_TSD_length,
                                             2U, 0, GT_UNDEF_UINT);
  gt_option_parser_add_option(op, optionmintsd);

  /* -maxtsd */
  optionmaxtsd = gt_option_new_ulong_min_max("maxtsd",
                                             "specify maximum length for each "
                                             "TSD",
                                             &arguments->max_TSD_length,
                                             11U, 0, GT_UNDEF_UINT);
  gt_option_parser_add_option(op, optionmaxtsd);
  gt_option_imply(optionmaxtsd, optionmintsd);

  /* -vicinity */
  optionvicinity = gt_option_new_ulong_min_max("vic",
                                               "specify the number of "
                                               "nucleotides (to the left and "
                                               "to the right) that will be "
                                               "searched for TSDs around 5' "
                                               "and 3' boundary of predicted "
                                               "TIRs",
                                               &arguments->vicinity,
                                               60U, 1U, 500U);
  gt_option_parser_add_option(op, optionvicinity);

  return op;
}
Ejemplo n.º 3
0
static OPrval parse_options(int *parsed_args,
                            LTRharvestoptions *lo,
                            int argc, const char **argv, GtError *err)
{
  GtOptionParser *op;
  GtOption *optionindex,
         *optionltrsearchseqrange,
         *optionseed,
         *optionminlenltr,
         *optionmaxlenltr,
         *optionmindistltr,
         *optionmaxdistltr,
         *optionmintsd,
         *optionmaxtsd,
         *optionsimilar,
         *optionmotif,
         *optionmotifmis,
         *optionvic,
         *optionoverlaps,
         *optionxdrop,
         *optionmat,
         *optionmis,
         *optionins,
         *optiondel,
         *optionv,
         *optionoffset,
         *optionlongoutput,
         *optionout,
         *optionoutinner,
         *optiongff3;
  OPrval oprval;
  GtRange default_ltrsearchseqrange = {0,0};
  unsigned int vicinityforcorrectboundaries;

  static const char *overlaps[] = {
    "best", /* the default */
    "no",
    "all",
    NULL
  };

  gt_error_check(err);
  op = gt_option_parser_new("[option ...] -index filenameindex",
                         "Predict LTR retrotransposons.");

  /* -index */
  lo->str_indexname = gt_str_new();
  optionindex = gt_option_new_string("index",
                             "specify the name of the enhanced suffix "
                             "array index (mandatory)",
                             lo->str_indexname, NULL);
  gt_option_is_mandatory(optionindex);
  gt_option_parser_add_option(op, optionindex);

  /* -range */
  optionltrsearchseqrange
    = gt_option_new_range("range",
                          "specify sequence range in which LTRs are searched",
                          &lo->repeatinfo.ltrsearchseqrange,
                          &default_ltrsearchseqrange);
  gt_option_parser_add_option(op, optionltrsearchseqrange);

  /* -seed */
  optionseed = gt_option_new_ulong_min("seed",
                               "specify minimum seed length for"
                               " exact repeats",
                               &lo->minseedlength,
                               30UL,
                               1UL);
  gt_option_parser_add_option(op, optionseed);

  /* -minlenltr */
  optionminlenltr = gt_option_new_ulong_min_max("minlenltr",
                               "specify minimum length for each LTR",
                               &lo->repeatinfo.lmin,
                               100UL,
                               1UL,
                               GT_UNDEF_ULONG);
  gt_option_parser_add_option(op, optionminlenltr);

  /* -maxlenltr */
  optionmaxlenltr = gt_option_new_ulong_min_max("maxlenltr",
                               "specify maximum length for each LTR",
                               &lo->repeatinfo.lmax,
                               1000UL,
                               1UL,
                               GT_UNDEF_ULONG);
  gt_option_parser_add_option(op, optionmaxlenltr);

  /* -mindistltr */
  optionmindistltr = gt_option_new_ulong_min_max("mindistltr",
                               "specify minimum distance of "
                               "LTR startpositions",
                               &lo->repeatinfo.dmin,
                               1000UL,
                               1UL,
                               GT_UNDEF_ULONG);
  gt_option_parser_add_option(op, optionmindistltr);

  /* -maxdistltr */
  optionmaxdistltr = gt_option_new_ulong_min_max("maxdistltr",
                               "specify maximum distance of "
                               "LTR startpositions",
                               &lo->repeatinfo.dmax,
                               15000UL,
                               1UL,
                               GT_UNDEF_ULONG);
  gt_option_parser_add_option(op, optionmaxdistltr);

  /* -similar */
  optionsimilar = gt_option_new_double_min_max("similar",
                               "specify similaritythreshold in "
                               "range [1..100%]",
                               &lo->similaritythreshold,
                               (double) 85.0,
                               (double) 0.0,
                               100.0);
  gt_option_parser_add_option(op, optionsimilar);

  /* -mintsd */
  optionmintsd = gt_option_new_uint_min_max("mintsd",
                              "specify minimum length for each TSD",
                               &lo->minlengthTSD,
                               4U,
                               0,
                               GT_UNDEF_UINT);
  gt_option_parser_add_option(op, optionmintsd);

  /* -maxtsd */
  optionmaxtsd = gt_option_new_uint_min_max("maxtsd",
                              "specify maximum length for each TSD",
                               &lo->maxlengthTSD,
                               20U,
                               0,
                               GT_UNDEF_UINT);
  gt_option_parser_add_option(op, optionmaxtsd);

  /* -motif */
  /* characters will be tranformed later
     into characters from virtualtree alphabet */
  lo->motif.firstleft   = (GtUchar) 't';
  lo->motif.secondleft  = (GtUchar) 'g';
  lo->motif.firstright  = (GtUchar) 'c';
  lo->motif.secondright = (GtUchar) 'a';
  lo->motif.str_motif = gt_str_new();
  optionmotif = gt_option_new_string("motif",
                             "specify 2 nucleotides startmotif + "
                             "2 nucleotides endmotif: ****",
                             lo->motif.str_motif, NULL);
  gt_option_parser_add_option(op, optionmotif);

  /* -motifmis */
  optionmotifmis = gt_option_new_uint_min_max("motifmis",
                             "specify maximum number of "
                             "mismatches in motif [0,3]",
                             &lo->motif.allowedmismatches,
                             4U,
                             0,
                             3U);
  gt_option_parser_add_option(op, optionmotifmis);

  /* -vic */
  optionvic = gt_option_new_uint_min_max("vic",
                        "specify the number of nucleotides (to the left and "
                        "to the right) that will be searched "
                        "for TSDs and/or motifs around 5' and 3' boundary "
                        "of predicted LTR retrotransposons",
                        &vicinityforcorrectboundaries,
                        60U,
                        1U,
                        500U);
  gt_option_parser_add_option(op, optionvic);

  /* -overlaps */
  lo->str_overlaps = gt_str_new();
  optionoverlaps = gt_option_new_choice("overlaps",
               "specify no|best|all",
               lo->str_overlaps,
               overlaps[0],
               overlaps);
  gt_option_parser_add_option(op, optionoverlaps);

  /* -xdrop */
  optionxdrop = gt_option_new_int_min("xdrop",
                        "specify xdropbelowscore for extension-alignment",
                        &lo->xdropbelowscore,
                        (int)5,
                        (int)0);
  gt_option_parser_add_option(op, optionxdrop);

  /* -mat */
  lo->arbitscores.gcd  = 1;      /* set only for initialization,
                                        do not change! */
  optionmat = gt_option_new_int_min("mat",
                        "specify matchscore for extension-alignment",
                        &lo->arbitscores.mat,
                        2,
                        1);
  gt_option_parser_add_option(op, optionmat);

  /* -mis */
  optionmis = gt_option_new_int_max("mis",
                        "specify mismatchscore for extension-alignment",
                        &lo->arbitscores.mis,
                        -2,
                        -1);
  gt_option_parser_add_option(op, optionmis);

  /* -ins */
  optionins = gt_option_new_int_max("ins",
                        "specify insertionscore for extension-alignment",
                        &lo->arbitscores.ins,
                        -3,
                        -1);
  gt_option_parser_add_option(op, optionins);

  /* -del */
  optiondel = gt_option_new_int_max("del",
                        "specify deletionscore for extension-alignment",
                        &lo->arbitscores.del,
                        -3,
                        -1);
  gt_option_parser_add_option(op, optiondel);

  /* -v */
  optionv = gt_option_new_bool("v",
                           "verbose mode",
                           &lo->verbosemode,
                           false);
  gt_option_parser_add_option(op, optionv);

  /* -longoutput */
  optionlongoutput = gt_option_new_bool("longoutput",
                           "additional motif/TSD output",
                           &lo->longoutput,
                           false);
  gt_option_parser_add_option(op, optionlongoutput);

  /* -out */
  lo->fastaoutput = false;      /* by default no FASTA output */
  lo->str_fastaoutputfilename = gt_str_new();
  optionout = gt_option_new_string("out",
                             "specify FASTA outputfilename",
                             lo->str_fastaoutputfilename, NULL);
  gt_option_parser_add_option(op, optionout);

  /* -outinner */
  lo->fastaoutputinnerregion = false;
  lo->str_fastaoutputfilenameinnerregion = gt_str_new();
  optionoutinner = gt_option_new_string("outinner",
                             "specify FASTA outputfilename for inner regions",
                             lo->str_fastaoutputfilenameinnerregion, NULL);
  gt_option_parser_add_option(op, optionoutinner);

  /* -gff3 */
  lo->gff3output = false;       /* by default no gff3 output */
  lo->str_gff3filename = gt_str_new();
  optiongff3 = gt_option_new_string("gff3",
                             "specify GFF3 outputfilename",
                             lo->str_gff3filename, NULL);
  gt_option_parser_add_option(op, optiongff3);

  /* -offset */
  optionoffset = gt_option_new_ulong("offset",
                                     "offset added to GFF3 coordinates",
                                     &lo->offset,
                                     0UL);
  gt_option_parser_add_option(op, optionoffset);
  gt_option_is_extended_option(optionoffset);

  /* implications */
  gt_option_imply(optionmaxtsd, optionmintsd);
  gt_option_imply(optionmotifmis, optionmotif);

  gt_option_imply_either_2(optionlongoutput, optionmintsd, optionmotif);

  gt_option_parser_refer_to_manual(op);
  oprval = gt_option_parser_parse(op, parsed_args, argc, argv, gt_versionfunc,
                                  err);
  lo->vicinityforcorrectboundaries = (Seqpos) vicinityforcorrectboundaries;
  if (oprval == OPTIONPARSER_OK)
  {
    if (lo->repeatinfo.lmin > lo->repeatinfo.lmax)
    {
      gt_error_set(err,"argument of -minlenltr is greater than argument of"
          " -maxlenltr");
      oprval = OPTIONPARSER_ERROR;
    }
    if (lo->repeatinfo.dmin > lo->repeatinfo.dmax)
    {
      gt_error_set(err,
          "argument of -mindistltr is greater than argument of -maxdistltr");
      oprval = OPTIONPARSER_ERROR;
    }
    if (lo->repeatinfo.lmax > lo->repeatinfo.dmin)
    {
      gt_error_set(err,"argument of -maxlenltr is greater than argument of"
                    " -mindistltr");
      oprval = OPTIONPARSER_ERROR;
    }
    if (lo->minlengthTSD > lo->maxlengthTSD)
    {
      gt_error_set(err,
          "argument of -mintsd is greater than argument of -maxtsd");
      oprval = OPTIONPARSER_ERROR;
    }

    /* If option motif is set,
       store characters, transform them later */
    if (gt_option_is_set(optionmotif))
    {
      if (gt_str_length(lo->motif.str_motif) != 4UL)
      {
        gt_error_set(err,
            "argument of -motif has not exactly 4 characters");
        oprval = OPTIONPARSER_ERROR;
      }
      lo->motif.firstleft = (GtUchar)  gt_str_get(lo->motif.str_motif)[0];
      lo->motif.secondleft = (GtUchar)  gt_str_get(lo->motif.str_motif)[1];
      lo->motif.firstright = (GtUchar)  gt_str_get(lo->motif.str_motif)[2];
      lo->motif.secondright = (GtUchar)  gt_str_get(lo->motif.str_motif)[3];
      /* default if motif specified */
      if (!gt_option_is_set(optionmotifmis))
      {
        lo->motif.allowedmismatches = 0;
      }
    }

    /* If option overlaps is set */
    if (gt_option_is_set(optionoverlaps))
    {
      if (strcmp(gt_str_get(lo->str_overlaps), "no") == 0)
      {
        lo->bestofoverlap = false;
        lo->nooverlapallowed = true;
      }
      else if (strcmp(gt_str_get(lo->str_overlaps), "best") == 0 )
      {
        lo->bestofoverlap = true;
        lo->nooverlapallowed = false;
      }
      else if (strcmp(gt_str_get(lo->str_overlaps), "all") == 0 )
      {
        lo->bestofoverlap = false;
        lo->nooverlapallowed = false;
      }
      else
      {
        gt_assert(0); /* cannot happen */
      }
    }
    else
    {
      /* default is "best" */
      lo->bestofoverlap = true;     /* take best prediction
                                       if overlap occurs, default */
      lo->nooverlapallowed = false; /* overlapping predictions (not)allowed*/
    }

    /* if FASTA output is set */
    if (gt_option_is_set(optionout))
    {
      lo->fastaoutput = true;
    }

    /* if FASTA output inner region is set */
    if (gt_option_is_set(optionoutinner))
    {
      lo->fastaoutputinnerregion = true;
    }

    /* if GFF3 output is set */
    if (gt_option_is_set(optiongff3))
    {
      lo->gff3output = true;
    }

    if (gt_option_is_set(optionltrsearchseqrange))
    {
      if (lo->repeatinfo.ltrsearchseqrange.start >
          lo->repeatinfo.ltrsearchseqrange.end)
      {
        gt_error_set(err,
            "arguments of -range: first arg must be <= than second arg");
        oprval = OPTIONPARSER_ERROR;
      }
    }
  }

  gt_option_parser_delete(op);
  return oprval;
}