Example #1
0
SamFile::SortedType SamFile::getSortOrderFromHeader(SamFileHeader& header)
{
    const char* tag = header.getSortOrder();
   
    // Default to unsorted since if it is not specified in the header
    // that is the value that should be used.
    SortedType headerSortOrder = UNSORTED;
    if(strcmp(tag, "queryname") == 0)
    {
        headerSortOrder = QUERY_NAME;
    }
    else if(strcmp(tag, "coordinate") == 0)
    {
        headerSortOrder = COORDINATE;
    }
    return(headerSortOrder);
}
Example #2
0
int Bam2FastQ::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    bool readName = false;
    String refFile = "";
    String outBase = "";
    String firstOut = "";
    String secondOut = "";
    String unpairedOut = "";

    bool interleave = false;
    bool noeof = false;
    bool params = false;

    myNumMateFailures = 0;
    myNumPairs = 0;
    myNumUnpaired = 0;
    myReverseComp = true;
    myRNPlus = false;
    myFirstRNExt = DEFAULT_FIRST_EXT;
    mySecondRNExt = DEFAULT_SECOND_EXT;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_PARAMETER("readName", &readName)
        LONG_PARAMETER("merge", &interleave)
        LONG_STRINGPARAMETER("refFile", &refFile)
        LONG_STRINGPARAMETER("firstRNExt", &myFirstRNExt)
        LONG_STRINGPARAMETER("secondRNExt", &mySecondRNExt)
        LONG_PARAMETER("rnPlus", &myRNPlus)
        LONG_PARAMETER("noReverseComp", &myReverseComp)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("Optional OutputFile Names")
        LONG_STRINGPARAMETER("outBase", &outBase)
        LONG_STRINGPARAMETER("firstOut", &firstOut)
        LONG_STRINGPARAMETER("secondOut", &secondOut)
        LONG_STRINGPARAMETER("unpairedOut", &unpairedOut)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Cannot specify both interleaved & secondOut since secondOut would be N/A.
    if(interleave && !secondOut.IsEmpty())
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --merge & --secondOut.\n";
        return(-1);
    }


    // Check to see if the out file was specified, if not, generate it from
    // the input filename.
    if(outBase == "")
    {
        // Just remove the extension from the input filename.
        int extStart = inFile.FastFindLastChar('.');
        if(extStart <= 0)
        {
            outBase = inFile;
        }
        else
        {
            outBase = inFile.Left(extStart);
        }
    }

    // Check to see if the first/second/single-ended were specified and
    // if not, set them.
    std::string firstExt = "_1.fastq";
    if(interleave)
    {
        firstExt = "_interleaved.fastq";
    }
    getFileName(firstOut, outBase, firstExt.c_str());
    getFileName(secondOut, outBase, "_2.fastq");
    getFileName(unpairedOut, outBase, ".fastq");

    if(params)
    {
        inputParameters.Status();
    }

    // Open the files for reading/writing.
    // Open prior to opening the output files,
    // so if there is an error, the outputs don't get created.
    SamFile samIn;
    SamFileHeader samHeader;
    samIn.OpenForRead(inFile, &samHeader);

    // Open the output files.
    myUnpairedFile = ifopen(unpairedOut, "w");

    // Only open the first file if it is different than an already opened file.
    if(firstOut != unpairedOut)
    {
        myFirstFile = ifopen(firstOut, "w");
    }
    else
    {
        myFirstFile = myUnpairedFile;
    }

    // If it is interleaved or the 2nd file is not a new name, set it appropriately.
    if(interleave || secondOut == firstOut)
    {
        mySecondFile = myFirstFile;
    }
    else if(secondOut == unpairedOut)
    {
        mySecondFile = myUnpairedFile;
    }
    else
    {
        mySecondFile = ifopen(secondOut, "w");
    }
    
    if(myUnpairedFile == NULL)
    {
        std::cerr << "Failed to open " << unpairedOut
                  << " so can't convert bam2FastQ.\n";
        return(-1);
    }
    if(myFirstFile == NULL)
    {
        std::cerr << "Failed to open " << firstOut
                  << " so can't convert bam2FastQ.\n";
        return(-1);
    }
    if(mySecondFile == NULL)
    {
        std::cerr << "Failed to open " << secondOut
                  << " so can't convert bam2FastQ.\n";
        return(-1);
    }

    if((readName) || (strcmp(samHeader.getSortOrder(), "queryname") == 0))
    {
        readName = true;
    }
    else
    {
        // defaulting to coordinate sorted.
        samIn.setSortedValidation(SamFile::COORDINATE);
    }

    // Setup the '=' translation if the reference was specified.
    if(!refFile.IsEmpty())
    {
        GenomeSequence* refPtr = new GenomeSequence(refFile);
        samIn.SetReadSequenceTranslation(SamRecord::BASES);
        samIn.SetReference(refPtr);
    }

    SamRecord* recordPtr;
    int16_t samFlag;

    SamStatus::Status returnStatus = SamStatus::SUCCESS;
    while(returnStatus == SamStatus::SUCCESS)
    {
        recordPtr = myPool.getRecord();
        if(recordPtr == NULL)
        {
            // Failed to allocate a new record.
            throw(std::runtime_error("Failed to allocate a new SAM/BAM record"));
        }
        if(!samIn.ReadRecord(samHeader, *recordPtr))
        {
            // Failed to read a record.
            returnStatus = samIn.GetStatus();
            continue;
        }

        // Have a record.  Check to see if it is a pair or unpaired read.
        samFlag = recordPtr->getFlag();
        if(SamFlag::isPaired(samFlag))
        {
            if(readName)
            {
                handlePairedRN(*recordPtr);
            }
            else
            {
                handlePairedCoord(*recordPtr);
            }
        }
        else
        {
            ++myNumUnpaired;
            writeFastQ(*recordPtr, myUnpairedFile);
        }
    }

    // Flush All
    cleanUpMateMap(0, true);

    if(returnStatus == SamStatus::NO_MORE_RECS)
    {
        returnStatus = SamStatus::SUCCESS;
    }

    samIn.Close();
    closeFiles();
    
    // Output the results
    std::cerr << "\nFound " << myNumPairs << " read pairs.\n";
    std::cerr << "Found " << myNumUnpaired << " unpaired reads.\n";
    if(myNumMateFailures != 0)
    {
        std::cerr << "Failed to find mates for " << myNumMateFailures
                  << " reads, so they were written as unpaired\n"
                  << "  (not included in either of the above counts).\n";
    }

    return(returnStatus);
}