예제 #1
0
seqan::ArgumentParser::ParseResult
parseArgs(FxFaidxOptions & options,
          int argc,
          char const ** argv)
{
    seqan::ArgumentParser parser("fx_faidx");
    setShortDescription(parser, "Indexing FASTA and indexed FASTA access.");
    setVersion(parser, "0.1");
    setDate(parser, "May 2012");
    
    addUsageLine(parser, "[\\fIOPTIONS\\fP] [\\fB-f\\fP \\fIFASTA\\fP] [\\fB-r\\fP \\fIREGION\\fP]+");
    addDescription(parser, "Equivalent program to samtools faidx.");

    // TODO(holtgrew): I want a custom help text!
    // addOption(parser, seqan::ArgParseOption("h", "help", "This helpful screen."));
    addOption(parser, seqan::ArgParseOption("v", "verbose", "Verbose, log to STDERR."));
    addOption(parser, seqan::ArgParseOption("vv", "very-verbose", "Very verbose, log to STDERR."));
    hideOption(parser, "very-verbose");

    addSection(parser, "FASTA / FAIDX Files");
    addOption(parser, seqan::ArgParseOption("f", "fasta-file", "Path to the FASTA file.", seqan::ArgParseArgument::STRING, false, "FASTA"));
    setRequired(parser, "fasta-file");
    addOption(parser, seqan::ArgParseOption("i", "index-file", "Path to the .fai index file.  Defaults to FASTA.fai", seqan::ArgParseArgument::STRING, false, "FASTA"));
    addOption(parser, seqan::ArgParseOption("o", "out-file", "Path to the resulting file.  If omitted, result is printed to stdout.", seqan::ArgParseArgument::STRING, false, "FASTA"));

    addSection(parser, "Regions");
    addOption(parser, seqan::ArgParseOption("r", "region", "Region to retrieve from FASTA file.  You can specify multiple regions with multiple \\fB-r\\fP \\fIREGION\\fP.  Note that regions are one-based, see below for detailed information about the format.", seqan::ArgParseArgument::STRING, true, "REGION"));

    addTextSection(parser, "Regions");
    addText(parser,
            "Regions can be specified in the formats \\fICHR\\fP, \\fICHR\\fP:\\fISTART\\fP, \\fICHR\\fP:\\fISTART\\fP:\\fIEND\\fP.  \\fICHR\\fP is the id of the reference sequence in the FASTA file, \\fISTART\\fP and \\fIEND\\fP are the start end end positions of the region.  These positions are one-based.");
    addTextSection(parser, "Region Examples");
    addListItem(parser, "\\fIchr1\\fP", "All of the sequence with the identifier \"chr1\".");
    addListItem(parser, "\\fIchrX\\fP:\\fI1,000\\fP", "The characters in the X chromsome, starting with the 1,000th base.");
    addListItem(parser, "\\fIchr2\\fP:\\fI1,500,000\\fP-\\fI2,000,000\\fP", "The character 1,500,000 up to and including character 2,000,000 in the same chromosome.");

    addTextSection(parser, "Usage Examples");
    addListItem(parser, "\\fBfx_faidx\\fP \\fB-f\\fP \\fIREF.fa\\fP", "Create index for file \\fIREF.fa\\fP, index is written to \\fIREF.fa.fai\\fP");
    addListItem(parser, "\\fBfx_faidx\\fP \\fB-f\\fP \\fIREF.fa\\fP \\fB-i\\fP \\fIINDEX.fai\\fP", "Create index for file \\fIREF.fa\\fP, index is written to \\fIINDEX.fai\\fP");
    addListItem(parser, "\\fBfx_faidx\\fP \\fB-f\\fP \\fIREF.fa\\fP \\fB-r\\fP \\fIchr1\\fP", "Retrieve sequence named \"chr1\" from file \\fIREF.fa\\fP using the index with the default name \\fIREF.fa.fai\\fP.  The index file name is created if it does not exist.");
    addListItem(parser, "\\fBfx_faidx\\fP \\fB-f\\fP \\fIREF.fa\\fP \\fB-r\\fP \\fIchr1:100-1100\\fP", "Retrieve characters 100 to 1,100 from the sequence named \"chr1\" from file \\fIREF.fa\\fP using the index with the default name \\fIREF.fa.fai\\fP.");
    addListItem(parser, "\\fBfx_faidx\\fP \\fB-f\\fP \\fIREF.fa\\fP \\fB-r\\fP \\fIchr1:100-1100\\fP \\fB-r\\fP \\fIchr2:2,000\\fP", "Retrieve characters 100-1,000 from \"chr1\" and all characters from 2,000 of \"chr2\".");
    
    seqan::ArgumentParser::ParseResult res = parse(parser, argc, argv);

    if (res == seqan::ArgumentParser::PARSE_OK)
    {
        getOptionValue(options.inFastaPath, parser, "fasta-file");

        // Set default FAI file name.
        options.inFaiPath = options.inFastaPath;
        append(options.inFaiPath, ".fai");
        // Get FAI file name from parser if set.
        if (isSet(parser, "index-file"))
            getOptionValue(options.inFaiPath, parser, "index-file");

        if (isSet(parser, "region"))
            options.regions = getOptionValues(parser, "region");

        if (isSet(parser, "out-file"))
            getOptionValue(options.outFastaPath, parser, "out-file");

        if (isSet(parser, "verbose"))
            options.verbosity = 2;
        if (isSet(parser, "very-verbose"))
            options.verbosity = 3;
    }

    return res;
}
예제 #2
0
파일: sak.cpp 프로젝트: abadd0n/seqan
seqan::ArgumentParser::ParseResult
parseArgs(SakOptions & options,
          int argc,
          char ** argv)
{
    seqan::ArgumentParser parser("sak");
    setShortDescription(parser, "Slicing and dicing of FASTA/FASTQ files..");
    setVersion(parser, SEQAN_APP_VERSION " [" SEQAN_REVISION "]");
    setDate(parser, SEQAN_DATE);
    setCategory(parser, "Utilities");

    addUsageLine(parser, "[\\fIOPTIONS\\fP] [\\fB-o\\fP \\fIOUT.{fa,fq}\\fP] \\fIIN.{fa,fq}\\fP");
    addDescription(parser, "\"It slices, it dices and it makes the laundry!\"");
    addDescription(parser, "Original SAK tool by David Weese. Rewrite by Manuel Holtgrewe.");

    // The only argument is the input file.
    addArgument(parser, seqan::ArgParseArgument(seqan::ArgParseArgument::INPUT_FILE, "IN"));

    // Only FASTA and FASTQ files are allowed as input.
    setValidValues(parser, 0, seqan::SeqFileIn::getFileExtensions());

    // TODO(holtgrew): I want a custom help text!
    // addOption(parser, seqan::ArgParseOption("h", "help", "This helpful screen."));
    addOption(parser, seqan::ArgParseOption("v", "verbose", "Verbose, log to STDERR."));
    hideOption(parser, "verbose");
    addOption(parser, seqan::ArgParseOption("vv", "very-verbose", "Very verbose, log to STDERR."));
    hideOption(parser, "very-verbose");

    addSection(parser, "Output Options");
    addOption(parser, seqan::ArgParseOption("o", "out-path",
                                            "Path to the resulting file.  If omitted, result is printed to stdout in FastQ format.",
                                            seqan::ArgParseOption::OUTPUT_FILE, "FASTX"));
    setValidValues(parser, "out-path", seqan::SeqFileOut::getFileExtensions());
    addOption(parser, seqan::ArgParseOption("rc", "revcomp", "Reverse-complement output."));
    addOption(parser, seqan::ArgParseOption("l", "max-length", "Maximal number of sequence characters to write out.",
                                            seqan::ArgParseOption::INTEGER, "LEN"));

    addSection(parser, "Filter Options");
    addOption(parser, seqan::ArgParseOption("s", "sequence", "Select the given sequence for extraction by 0-based index.",
                                            seqan::ArgParseOption::INTEGER, "NUM", true));
    addOption(parser, seqan::ArgParseOption("sn", "sequence-name", "Select sequence with name prefix being \\fINAME\\fP.",
                                            seqan::ArgParseOption::STRING, "NAME", true));
    addOption(parser, seqan::ArgParseOption("ss", "sequences",
                                            "Select sequences \\fIfrom\\fP-\\fIto\\fP where \\fIfrom\\fP and \\fIto\\fP "
                                            "are 0-based indices.",
                                            seqan::ArgParseArgument::STRING, "RANGE", true));
    addOption(parser, seqan::ArgParseOption("i", "infix",
                                            "Select characters \\fIfrom\\fP-\\fIto\\fP where \\fIfrom\\fP and \\fIto\\fP "
                                            "are 0-based indices.",
                                            seqan::ArgParseArgument::STRING, "RANGE", true));

    addOption(parser, seqan::ArgParseOption("ll", "line-length",
                                            "Set line length in output file.  See section \\fILine Length\\fP for details.",
                                            seqan::ArgParseArgument::INTEGER, "LEN", false));
    setMinValue(parser, "line-length", "-1");

    addTextSection(parser, "Line Length");
    addText(parser,
            "You can use the setting \\fB--line-length\\fP for setting the resulting line length.  By default, "
            "sequences in FASTA files are written with at most 70 characters per line and sequences in FASTQ files are "
            "written without any line breaks.  The quality sequence in FASTQ file is written in the same way as the "
            "residue sequence.");
    addText(parser,
            "The default is selected with a \\fB--line-length\\fP value of \\fI-1\\fP and line breaks can be disabled "
            "with a value of \\fI0\\fP.");

    addTextSection(parser, "Usage Examples");
    addListItem(parser, "\\fBsak\\fP \\fB-s\\fP \\fI10\\fP \\fIIN.fa\\fP",
                "Cut out 11th sequence from \\fIIN.fa\\fP and write to stdout as FASTA.");
    addListItem(parser, "\\fBsak\\fP \\fB-ss\\fP \\fI10-12\\fP \\fB-ss\\fP \\fI100-200\\fP \\fIIN.fq\\fP",
                "Cut out 11th up to and including 12th and 101th up to and including 199th sequence from \\fIIN.fq\\fP "
                "and write to stdout as FASTA.");

    seqan::ArgumentParser::ParseResult res = parse(parser, argc, argv);

    if (res != seqan::ArgumentParser::PARSE_OK)
        return res;

    getArgumentValue(options.inFastxPath, parser, 0);

    seqan::CharString tmp;
    getOptionValue(tmp, parser, "out-path");

    if (isSet(parser, "out-path"))
        getOptionValue(options.outPath, parser, "out-path");

    if (isSet(parser, "verbose"))
        options.verbosity = 2;
    if (isSet(parser, "very-verbose"))
        options.verbosity = 3;

    if (isSet(parser, "sequence"))
    {
        std::vector<std::string> sequenceIds = getOptionValues(parser, "sequence");
        for (unsigned i = 0; i < seqan::length(sequenceIds); ++i)
        {
            unsigned idx = 0;
            if (!seqan::lexicalCast(idx, sequenceIds[i]))
            {
                std::cerr << "ERROR: Invalid sequence index " << sequenceIds[i] << "\n";
                return seqan::ArgumentParser::PARSE_ERROR;
            }
            appendValue(options.seqIndices, idx);
        }
    }

    if (isSet(parser, "sequences"))
    {
        std::vector<std::string> sequenceRanges = getOptionValues(parser, "sequences");
        seqan::CharString buffer;
        for (unsigned i = 0; i < seqan::length(sequenceRanges); ++i)
        {
            seqan::Pair<uint64_t> range;
            if (!parseRange(range.i1, range.i2, sequenceRanges[i]))
            {
                std::cerr << "ERROR: Invalid range " << sequenceRanges[i] << "\n";
                return seqan::ArgumentParser::PARSE_ERROR;
            }
            appendValue(options.seqIndexRanges, range);
        }
    }

    if (isSet(parser, "infix"))
    {
        seqan::CharString buffer;
        getOptionValue(buffer, parser, "infix");
        if (!parseRange(options.seqInfixBegin, options.seqInfixEnd, buffer))
        {
            std::cerr << "ERROR: Invalid range " << buffer << "\n";
            return seqan::ArgumentParser::PARSE_ERROR;
        }
    }

    options.reverseComplement = isSet(parser, "revcomp");

    if (isSet(parser, "max-length"))
        getOptionValue(options.maxLength, parser, "max-length");

    if (isSet(parser, "sequence-name"))
        getOptionValue(options.readPattern, parser, "sequence-name");

    getOptionValue(options.seqOutOptions.lineLength, parser, "line-length");

    return res;
}