Ejemplo n.º 1
0
SamStatus::Status ClipOverlap::readCoordRecord(SamFile& samIn,
                                               SamRecord** recordPtr, 
                                               MateMapByCoord& mateMap,
                                               SamCoordOutput* outputBufferPtr)
{
    // Null pointer, so get a new pointer.
    if(*recordPtr == NULL)
    {
        *recordPtr = myPool.getRecord();
        if(*recordPtr == NULL)
        {
            // Failed to allocate a new record.
            // Try to free up records from the mate map
            if(!forceRecordFlush(mateMap, outputBufferPtr))
            {
                std::cerr << "Failed to flush the output buffer.\n";
                return(SamStatus::FAIL_IO);
            }
            // Try to get a new record, one should have been cleared.
            *recordPtr = myPool.getRecord();
            if(*recordPtr == NULL)
            {
                std::cerr << "Failed to allocate any records.\n";
                return(SamStatus::FAIL_MEM);
            }
        }
    }

    // RecordPtr is set.
    if(!samIn.ReadRecord(mySamHeader, **recordPtr))
    {
        // Nothing to process, so return.
        return(samIn.GetStatus());
    }
    return(SamStatus::SUCCESS);
}
Ejemplo n.º 2
0
int Dedup_LowMem::execute(int argc, char** argv)
{
    /* --------------------------------
     * process the arguments
     * -------------------------------*/
    String inFile, outFile, logFile;
    myDoRecab = false;
    bool removeFlag = false;
    bool verboseFlag = false;
    myForceFlag = false;
    myNumMissingMate = 0;
    myMinQual = DEFAULT_MIN_QUAL;
    String excludeFlags = "0xB04";
    uint16_t intExcludeFlags = 0;
    bool noeof = false;
    bool params = false;

    LongParamContainer parameters;
    parameters.addGroup("Required Parameters");
    parameters.addString("in", &inFile);
    parameters.addString("out", &outFile);
    parameters.addGroup("Optional Parameters");
    parameters.addInt("minQual", & myMinQual);
    parameters.addString("log", &logFile);
    parameters.addBool("oneChrom", &myOneChrom);
    parameters.addBool("recab", &myDoRecab);
    parameters.addBool("rmDups", &removeFlag);
    parameters.addBool("force", &myForceFlag);
    parameters.addString("excludeFlags", &excludeFlags);
    parameters.addBool("verbose", &verboseFlag);
    parameters.addBool("noeof", &noeof);
    parameters.addBool("params", &params);
    parameters.addPhoneHome(VERSION);
    myRecab.addRecabSpecificParameters(parameters);

    ParameterList inputParameters;
    inputParameters.Add(new LongParameters ("Input Parameters",
                                            parameters.getLongParameterList()));

    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    if(inFile.IsEmpty())
    {
        printUsage(std::cerr);
        inputParameters.Status();
        std::cerr << "Specify an input file" << std::endl;
        return EXIT_FAILURE;
    }

    if(outFile.IsEmpty())
    {
        printUsage(std::cerr);
        inputParameters.Status();
        std::cerr << "Specify an output file" << std::endl;
        return EXIT_FAILURE;
    }

    intExcludeFlags = excludeFlags.AsInteger();

    if(myForceFlag && SamFlag::isDuplicate(intExcludeFlags))
    {
        printUsage(std::cerr);
        inputParameters.Status();
        std::cerr << "Cannot specify --force and Duplicate in the excludeFlags.  Since --force indicates to override"
                  << " previous duplicate setting and the excludeFlags says to skip those, you can't do both.\n";
        return EXIT_FAILURE;
    }

    if(!SamFlag::isSecondary(intExcludeFlags))
    {
        printUsage(std::cerr);
        inputParameters.Status();
        std::cerr << "ERROR: Secondary reads must be excluded, edit --excludeFlags to include 0x0100\n";
        return EXIT_FAILURE;
    }

    if(!(intExcludeFlags & SamFlag::SUPPLEMENTARY_ALIGNMENT))
    {
        printUsage(std::cerr);
        inputParameters.Status();
        std::cerr << "ERROR: Supplementary reads must be excluded, edit --excludeFlags to include 0x0800\n";
        return EXIT_FAILURE;
    }

    if(logFile.IsEmpty())
    {
        logFile = outFile + ".log";
    }

    if(myDoRecab)
    {
        int status = myRecab.processRecabParam();
        if(status != 0)
        {
            inputParameters.Status();
            return(status);
        }
    }

    if(params)
    {
        inputParameters.Status();
    }

    Logger::gLogger = new Logger(logFile.c_str(), verboseFlag);

    /* -------------------------------------------------------------------
     * The arguments are processed.  Prepare the input BAM file,
     * instantiate dedup_LowMem, and construct the read group library map
     * ------------------------------------------------------------------*/

    SamFile samIn;

    samIn.OpenForRead(inFile.c_str());
    // If the file isn't sorted it will throw an exception.
    samIn.setSortedValidation(SamFile::COORDINATE);

    SamFileHeader header;
    samIn.ReadHeader(header);

    buildReadGroupLibraryMap(header);

    lastReference = -1;
    lastCoordinate = -1;

    // for keeping some basic statistics
    uint32_t recordCount = 0;
    uint32_t pairedCount = 0;
    uint32_t properPairCount = 0;
    uint32_t unmappedCount = 0;
    uint32_t reverseCount = 0;
    uint32_t qualCheckFailCount = 0;
    uint32_t secondaryCount = 0;
    uint32_t supplementaryCount = 0;
    uint32_t excludedCount = 0;

    // Now we start reading records
    SamRecord* recordPtr;
    SamStatus::Status returnStatus = SamStatus::SUCCESS;
    while(returnStatus == SamStatus::SUCCESS)
    {
        recordPtr = mySamPool.getRecord();
        if(recordPtr == NULL)
        {
            std::cerr << "Failed to allocate enough records\n";
            return(-1);
        }
        if(!samIn.ReadRecord(header, *recordPtr))
        {
            returnStatus = samIn.GetStatus();
            continue;
        }
        // Take note of properties of this record
        int flag = recordPtr->getFlag();
        if(SamFlag::isPaired(flag))     ++pairedCount;
        if(SamFlag::isProperPair(flag)) ++properPairCount;
        if(SamFlag::isReverse(flag))    ++reverseCount;
        if(SamFlag::isQCFailure(flag))  ++qualCheckFailCount;
        if(SamFlag::isSecondary(flag))  ++secondaryCount;
        if(flag & SamFlag::SUPPLEMENTARY_ALIGNMENT)  ++supplementaryCount;
        if(!SamFlag::isMapped(flag))    ++unmappedCount;

        // put the record in the appropriate maps:
        //   single reads go in myFragmentMap
        //   paired reads go in myPairedMap
        recordCount = samIn.GetCurrentRecordCount();

        // if we have moved to a new position, look back at previous reads for duplicates
        if (hasPositionChanged(*recordPtr))
        {
            cleanupPriorReads(recordPtr);
        }

        // Determine if this read should be checked for duplicates.
        if((!SamFlag::isMapped(flag)) || ((flag & intExcludeFlags) != 0))
        {
            ++excludedCount;

            // No deduping done on this record, but still build the recab table.
            if(myDoRecab)
            {
                myRecab.processReadBuildTable(*recordPtr);
            }
            // Nothing more to do with this record, so
            // release the pointer.
            mySamPool.releaseRecord(recordPtr);
        }
        else
        {
            if(SamFlag::isDuplicate(flag) && !myForceFlag)
            {
                // Error: Marked duplicates, and duplicates aren't excluded.
                Logger::gLogger->error("There are records already duplicate marked.");
                Logger::gLogger->error("Use -f to clear the duplicate flag and start the dedup_LowMem procedure over");
            }

            checkDups(*recordPtr, recordCount);
            mySamPool.releaseRecord(recordPtr);
        }
        // let the user know we're not napping
        if (verboseFlag && (recordCount % 100000 == 0))
        {
            Logger::gLogger->writeLog("recordCount=%u singleKeyMap=%u pairedKeyMap=%u, dictSize=%u",
                                      recordCount, myFragmentMap.size(),
                                      myPairedMap.size(),
                                      myMateMap.size());
        }
    }

    // we're finished reading record so clean up the duplicate search and
    //  close the input file
    cleanupPriorReads(NULL);
    samIn.Close();

    // print some statistics
    Logger::gLogger->writeLog("--------------------------------------------------------------------------");
    Logger::gLogger->writeLog("SUMMARY STATISTICS OF THE READS");
    Logger::gLogger->writeLog("Total number of reads: %u",recordCount);
    Logger::gLogger->writeLog("Total number of paired-end reads: %u",
                              pairedCount);
    Logger::gLogger->writeLog("Total number of properly paired reads: %u",
                              properPairCount);
    Logger::gLogger->writeLog("Total number of unmapped reads: %u",
                              unmappedCount);
    Logger::gLogger->writeLog("Total number of reverse strand mapped reads: %u",
                              reverseCount);
    Logger::gLogger->writeLog("Total number of QC-failed reads: %u",
                              qualCheckFailCount);
    Logger::gLogger->writeLog("Total number of secondary reads: %u",
                              secondaryCount);
    Logger::gLogger->writeLog("Total number of supplementary reads: %u",
                              supplementaryCount);
    Logger::gLogger->writeLog("Size of singleKeyMap (must be zero): %u",
                              myFragmentMap.size());
    Logger::gLogger->writeLog("Size of pairedKeyMap (must be zero): %u",
                              myPairedMap.size());
    Logger::gLogger->writeLog("Total number of missing mates: %u",
                              myNumMissingMate);
    Logger::gLogger->writeLog("Total number of reads excluded from duplicate checking: %u",
                              excludedCount);
    Logger::gLogger->writeLog("--------------------------------------------------------------------------");
    Logger::gLogger->writeLog("Sorting the indices of %d duplicated records",
                              myDupList.size());

    // sort the indices of duplicate records
    std::sort(myDupList.begin(), myDupList.end(),
              std::less<uint32_t> ());

    // get ready to write the output file by making a second pass
    // through the input file
    samIn.OpenForRead(inFile.c_str());
    samIn.ReadHeader(header);

    SamFile samOut;
    samOut.OpenForWrite(outFile.c_str());
    samOut.WriteHeader(header);

    // If we are recalibrating, output the model information.
    if(myDoRecab)
    {
        myRecab.modelFitPrediction(outFile);
    }

    // an iterator to run through the duplicate indices
    int currentDupIndex = 0;
    bool moreDups = !myDupList.empty();

    // let the user know what we're doing
    Logger::gLogger->writeLog("\nWriting %s", outFile.c_str());

    // count the duplicate records as a check
    uint32_t singleDuplicates(0), pairedDuplicates(0);

    // start reading records and writing them out
    SamRecord record;
    while(samIn.ReadRecord(header, record))
    {
        uint32_t currentIndex = samIn.GetCurrentRecordCount();

        bool foundDup = moreDups &&
                        (currentIndex == myDupList[currentDupIndex]);

        // modify the duplicate flag and write out the record,
        // if it's appropriate
        int flag = record.getFlag();
        if (foundDup)
        {
            // this record is a duplicate, so mark it.
            record.setFlag( flag | 0x400 );
            currentDupIndex++;
            // increment duplicate counters to verify we found them all
            if ( ( ( flag & 0x0001 ) == 0 ) || ( flag & 0x0008 ) )
            {   // unpaired or mate unmapped
                singleDuplicates++;
            }
            else
            {
                pairedDuplicates++;
            }
            // recalibrate if necessary.
            if(myDoRecab)
            {
                myRecab.processReadApplyTable(record);
            }

            // write the record if we are not removing duplicates
            if (!removeFlag ) samOut.WriteRecord(header, record);
        }
        else
        {
            if(myForceFlag)
            {
                // this is not a duplicate we've identified but we want to
                // remove any duplicate marking
                record.setFlag( flag & 0xfffffbff ); // unmark duplicate
            }
            // Not a duplicate, so recalibrate if necessary.
            if(myDoRecab)
            {
                myRecab.processReadApplyTable(record);
            }
            samOut.WriteRecord(header, record);
        }

        // Let the user know we're still here
        if (verboseFlag && (currentIndex % 100000 == 0)) {
            Logger::gLogger->writeLog("recordCount=%u", currentIndex);
        }
    }

    // We're done.  Close the files and print triumphant messages.
    samIn.Close();
    samOut.Close();

    Logger::gLogger->writeLog("Successfully %s %u unpaired and %u paired duplicate reads",
                              removeFlag ? "removed" : "marked" ,
                              singleDuplicates,
                              pairedDuplicates/2);
    Logger::gLogger->writeLog("\nDedup_LowMem complete!");
    return 0;
}
Ejemplo n.º 3
0
int Revert::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    String outFile = "";
    bool cigar = false;
    bool qual = false;
    bool noeof = false;
    bool params = false;
    bool rmBQ = false;
    String rmTags = "";
    myKeepTags = false;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_STRINGPARAMETER("out", &outFile)
        LONG_PARAMETER("cigar", &cigar)
        LONG_PARAMETER("qual", &qual)
        LONG_PARAMETER("keepTags", &myKeepTags)
        LONG_PARAMETER("rmBQ", &rmBQ)
        LONG_STRINGPARAMETER("rmTags", &rmTags)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }
    
    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    if(outFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--out is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the input file for reading.
    SamFile samIn;
    samIn.OpenForRead(inFile);

    // Open the output file for writing.
    SamFile samOut;
    samOut.OpenForWrite(outFile);

    // Read the sam header.
    SamFileHeader samHeader;
    samIn.ReadHeader(samHeader);

    // Write the sam header.
    samOut.WriteHeader(samHeader);

    SamRecord samRecord;

    // Set returnStatus to success.  It will be changed to the
    // failure reason if any of the writes or updates fail.
    SamStatus::Status returnStatus = SamStatus::SUCCESS;

    // Keep reading records until ReadRecord returns false.
    while(samIn.ReadRecord(samHeader, samRecord))
    {
        // Update the cigar & position.
        if(cigar)
        {
            if(!updateCigar(samRecord))
            {
                // Failed to update the cigar & position.
                fprintf(stderr, "%s\n", samIn.GetStatusMessage());
                returnStatus = samIn.GetStatus();
            }
        }
        if(qual)
        {
            if(!updateQual(samRecord))
            {
                // Failed to update the quality.
                fprintf(stderr, "%s\n", samIn.GetStatusMessage());
                returnStatus = samIn.GetStatus();
            }
        }

        if(rmBQ)
        {
            if(!removeBQ(samRecord))
            {
                // Failed to remove BQ.
                fprintf(stderr, "%s\n", samIn.GetStatusMessage());
                returnStatus = samIn.GetStatus();
            }
        }

        if(rmTags != "")
        {
            if(!samRecord.rmTags(rmTags.c_str()))
            {
                // Failed to remove the specified tags.
                fprintf(stderr, "%s\n", samIn.GetStatusMessage());
                returnStatus = samIn.GetStatus();
            }
        }

        // Successfully read a record from the file, so write it.
        if(!samOut.WriteRecord(samHeader, samRecord))
        {
            // Failed to write a record.
            fprintf(stderr, "%s\n", samOut.GetStatusMessage());
            returnStatus = samOut.GetStatus();
        }
    }

    std::cerr << std::endl << "Number of records read = " << 
        samIn.GetCurrentRecordCount() << std::endl;
    std::cerr << "Number of records written = " << 
        samOut.GetCurrentRecordCount() << std::endl;

    // Since the reads were successful, return the status based
    // on the status of the writes.  If any failed, return
    // their failure status.
    return(returnStatus);
}
Ejemplo n.º 4
0
int Convert::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    String outFile = "";
    String refFile = "";
    bool lshift = false;
    bool noeof = false;
    bool params = false;

    bool useBases = false;
    bool useEquals = false;
    bool useOrigSeq = false;

    bool recover = false;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_STRINGPARAMETER("out", &outFile)
        LONG_STRINGPARAMETER("refFile", &refFile)
        LONG_PARAMETER("lshift", &lshift)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("recover", &recover)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("SequenceConversion")
            EXCLUSIVE_PARAMETER("useBases", &useBases)
            EXCLUSIVE_PARAMETER("useEquals", &useEquals)
            EXCLUSIVE_PARAMETER("useOrigSeq", &useOrigSeq)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }
    
    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        printUsage(std::cerr);
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    if(outFile == "")
    {
        printUsage(std::cerr);
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--out is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Check to see if the ref file was specified.
    // Open the reference.
    GenomeSequence* refPtr = NULL;
    if(refFile != "")
    {
        refPtr = new GenomeSequence(refFile);
    }

    SamRecord::SequenceTranslation translation;
    if((useBases) && (refPtr != NULL))
    {
        translation = SamRecord::BASES;
    }
    else if((useEquals) && (refPtr != NULL))
    {
        translation = SamRecord::EQUAL;
    }
    else
    {
        useOrigSeq = true;
        translation = SamRecord::NONE;
    }
    
    if(params)
    {
        inputParameters.Status();
    }

    // Open the input file for reading.
    SamFile samIn;
    if(recover) samIn.setAttemptRecovery(true);
    samIn.OpenForRead(inFile);

    // Open the output file for writing.
    SamFile samOut;
    samOut.OpenForWrite(outFile);
    samOut.SetWriteSequenceTranslation(translation);
    samOut.SetReference(refPtr);

    // Read the sam header.
    SamFileHeader samHeader;
    samIn.ReadHeader(samHeader);

    // Write the sam header.
    samOut.WriteHeader(samHeader);

    SamRecord samRecord;

    // Set returnStatus to success.  It will be changed
    // to the failure reason if any of the writes fail.
    SamStatus::Status returnStatus = SamStatus::SUCCESS;

    while(1) {
        try {
            // Keep reading records until ReadRecord returns false.
            while(samIn.ReadRecord(samHeader, samRecord))
            {
                // left shift if necessary.
                if(lshift)
                {
                    samRecord.shiftIndelsLeft();
                }

                // Successfully read a record from the file, so write it.
                if(!samOut.WriteRecord(samHeader, samRecord))
                {
                    // Failed to write a record.
                    fprintf(stderr, "%s\n", samOut.GetStatusMessage());
                    returnStatus = samOut.GetStatus();
                }
            }
            break;
        } catch (std::runtime_error e) {
            std::cerr << "Caught runtime error: " << e.what() << "\n";
            if(!recover) {
                std::cerr << "Corrupted BAM file detected - consider using --recover option.\n";
                break;
            }
            std::cerr << "Attempting to resync at next good BGZF block and BAM record.\n";
            // XXX need to resync SamFile stream here
            bool rc = samIn.attemptRecoverySync(checkSignature, SIGNATURE_LENGTH);
            if(rc) {
                std::cerr << "Successful resync - some data lost.\n";
                continue;    // succeeded
            }
            std::cerr << "Failed to re-sync on data stream.\n";
            break;              // failed to resync
        }
    }

    std::cerr << std::endl << "Number of records read = " << 
        samIn.GetCurrentRecordCount() << std::endl;
    std::cerr << "Number of records written = " << 
        samOut.GetCurrentRecordCount() << std::endl;

    if(refPtr != NULL)
    {
        delete(refPtr);
    }

    // Since the reads were successful, return the status based
    // on the status of the writes.  If any failed, return
    // their failure status.
    return(returnStatus);
}
Ejemplo n.º 5
0
int Bam2FastQ::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    bool readName = false;
    String refFile = "";
    String firstOut = "";
    String secondOut = "";
    String unpairedOut = "";

    bool interleave = false;
    bool noeof = false;
    bool gzip = false;
    bool params = false;

    myOutBase = "";
    myNumMateFailures = 0;
    myNumPairs = 0;
    myNumUnpaired = 0;
    mySplitRG = false;
    myQField = "";
    myNumQualTagErrors = 0;
    myReverseComp = true;
    myRNPlus = false;
    myFirstRNExt = DEFAULT_FIRST_EXT;
    mySecondRNExt = DEFAULT_SECOND_EXT;
    myCompression = InputFile::DEFAULT;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_PARAMETER("readName", &readName)
        LONG_PARAMETER("splitRG", &mySplitRG)
        LONG_STRINGPARAMETER("qualField", &myQField)
        LONG_PARAMETER("merge", &interleave)
        LONG_STRINGPARAMETER("refFile", &refFile)
        LONG_STRINGPARAMETER("firstRNExt", &myFirstRNExt)
        LONG_STRINGPARAMETER("secondRNExt", &mySecondRNExt)
        LONG_PARAMETER("rnPlus", &myRNPlus)
        LONG_PARAMETER("noReverseComp", &myReverseComp)
        LONG_PARAMETER("gzip", &gzip)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("Optional OutputFile Names")
        LONG_STRINGPARAMETER("outBase", &myOutBase)
        LONG_STRINGPARAMETER("firstOut", &firstOut)
        LONG_STRINGPARAMETER("secondOut", &secondOut)
        LONG_STRINGPARAMETER("unpairedOut", &unpairedOut)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    if(gzip)
    {
        myCompression = InputFile::GZIP;
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Cannot specify both interleaved & secondOut since secondOut would be N/A.
    if(interleave && !secondOut.IsEmpty())
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --merge & --secondOut.\n";
        return(-1);
    }

    // Cannot specify both interleaved & secondOut since secondOut would be N/A.
    if(interleave && !secondOut.IsEmpty())
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --merge & --secondOut.\n";
        return(-1);
    }

    // Cannot specify both splitRG & firstOut/secondOut/unpairedOut
    // since it needs a different file for each RG.
    if(mySplitRG && (!firstOut.IsEmpty() || 
                   !secondOut.IsEmpty() || !unpairedOut.IsEmpty()))
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --splitRG & --firstOut/--secondOut/--unpairedOut.\n";
        std::cerr << "Use --outBase instead.\n";
        return(-1);
    }
    // Cannot specify splitRG & output to stdout.
    if(mySplitRG && (myOutBase[0] == '-'))
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --splitRG & write to stdout.\n";
        return(-1);
    }

    // Check to see if the out file was specified, if not, generate it from
    // the input filename.
    if(myOutBase == "")
    {
        // Just remove the extension from the input filename.
        int extStart = inFile.FastFindLastChar('.');
        if(extStart <= 0)
        {
            myOutBase = inFile;
        }
        else
        {
            myOutBase = inFile.Left(extStart);
        }
    }

    if(mySplitRG)
    {
        std::string fqList = myOutBase.c_str();
        fqList += ".list";
        myFqList = ifopen(fqList.c_str(), "w");
        ifprintf(myFqList, "MERGE_NAME\tFASTQ1\tFASTQ2\tRG\n");
    }

    // Check to see if the first/second/single-ended were specified and
    // if not, set them.
    myFirstFileNameExt = "_1.fastq";
    mySecondFileNameExt = "_2.fastq";
    myUnpairedFileNameExt = ".fastq";
    if(interleave)
    {
        myFirstFileNameExt = "_interleaved.fastq";
        myFirstFileNameExt = "_interleaved.fastq";
    }
    getFileName(firstOut, myFirstFileNameExt);
    getFileName(secondOut, mySecondFileNameExt);
    getFileName(unpairedOut, myUnpairedFileNameExt);

    if(params)
    {
        inputParameters.Status();
    }

    // Open the files for reading/writing.
    // Open prior to opening the output files,
    // so if there is an error, the outputs don't get created.
    SamFile samIn;
    samIn.OpenForRead(inFile, &mySamHeader);
    // Skip non-primary reads.
    samIn.SetReadFlags(0, 0x0100);

    // Open the output files if not splitting RG
    if(!mySplitRG)
    {
        myUnpairedFile = ifopen(unpairedOut, "w", myCompression);

        // Only open the first file if it is different than an already opened file.
        if(firstOut != unpairedOut)
        {
            myFirstFile = ifopen(firstOut, "w", myCompression);
        }
        else
        {
            myFirstFile = myUnpairedFile;
        }

        // If it is interleaved or the 2nd file is not a new name, set it appropriately.
        if(interleave || secondOut == firstOut)
        {
            mySecondFile = myFirstFile;
        }
        else if(secondOut == unpairedOut)
        {
            mySecondFile = myUnpairedFile;
        }
        else
        {
            mySecondFile = ifopen(secondOut, "w", myCompression);
        }
    
        if(myUnpairedFile == NULL)
        {
            std::cerr << "Failed to open " << unpairedOut
                      << " so can't convert bam2FastQ.\n";
            return(-1);
        }
        if(myFirstFile == NULL)
        {
            std::cerr << "Failed to open " << firstOut
                      << " so can't convert bam2FastQ.\n";
            return(-1);
        }
        if(mySecondFile == NULL)
        {
            std::cerr << "Failed to open " << secondOut
                      << " so can't convert bam2FastQ.\n";
            return(-1);
        }
    }

    if((readName) || (strcmp(mySamHeader.getSortOrder(), "queryname") == 0))
    {
        readName = true;
    }
    else
    {
        // defaulting to coordinate sorted.
        samIn.setSortedValidation(SamFile::COORDINATE);
    }

    // Setup the '=' translation if the reference was specified.
    if(!refFile.IsEmpty())
    {
        GenomeSequence* refPtr = new GenomeSequence(refFile);
        samIn.SetReadSequenceTranslation(SamRecord::BASES);
        samIn.SetReference(refPtr);
    }

    SamRecord* recordPtr;
    int16_t samFlag;

    SamStatus::Status returnStatus = SamStatus::SUCCESS;
    while(returnStatus == SamStatus::SUCCESS)
    {
        recordPtr = myPool.getRecord();
        if(recordPtr == NULL)
        {
            // Failed to allocate a new record.
            throw(std::runtime_error("Failed to allocate a new SAM/BAM record"));
        }
        if(!samIn.ReadRecord(mySamHeader, *recordPtr))
        {
            // Failed to read a record.
            returnStatus = samIn.GetStatus();
            continue;
        }

        // Have a record.  Check to see if it is a pair or unpaired read.
        samFlag = recordPtr->getFlag();
        if(SamFlag::isPaired(samFlag))
        {
            if(readName)
            {
                handlePairedRN(*recordPtr);
            }
            else
            {
                handlePairedCoord(*recordPtr);
            }
        }
        else
        {
            ++myNumUnpaired;
            writeFastQ(*recordPtr, myUnpairedFile,
                       myUnpairedFileNameExt);
        }
    }

    // Flush All
    cleanUpMateMap(0, true);

    if(returnStatus == SamStatus::NO_MORE_RECS)
    {
        returnStatus = SamStatus::SUCCESS;
    }

    samIn.Close();
    closeFiles();
    
    // Output the results
    std::cerr << "\nFound " << myNumPairs << " read pairs.\n";
    std::cerr << "Found " << myNumUnpaired << " unpaired reads.\n";
    if(myNumMateFailures != 0)
    {
        std::cerr << "Failed to find mates for " << myNumMateFailures
                  << " reads, so they were written as unpaired\n"
                  << "  (not included in either of the above counts).\n";
    }
    if(myNumQualTagErrors != 0)
    {
        std::cerr << myNumQualTagErrors << " records did not have tag "
                  << myQField.c_str() << " or it was invalid, so the quality field was used for those records.\n";
    }

    return(returnStatus);
}
Ejemplo n.º 6
0
int main(int argc, char ** argv)
{
  gpLogger = new Logger;

  static struct option getopt_long_options[] = 
    {
      // Input options
      { "fasta", required_argument, NULL, 'f'},
      { "in", required_argument, NULL, 'i'},
      { "out", required_argument, NULL, 'o'},
      { "verbose", no_argument, NULL, 'v'},
      { "log", required_argument, NULL, 'l'},
      { "clear", no_argument, NULL, 0},
      { "AS", required_argument, NULL, 0},
      { "UR", required_argument, NULL, 0},
      { "SP", required_argument, NULL, 0},
      { "HD", required_argument, NULL, 0},
      { "RG", required_argument, NULL, 0},
      { "PG", required_argument, NULL, 0},
      { "checkSQ", no_argument, NULL, 0},
      { NULL, 0, NULL, 0 },
    };

  int n_option_index = 0, c;
  
  std::string sAS, sUR, sSP, sFasta, sInFile, sOutFile, sLogFile;
  bool bClear, bCheckSQ, bVerbose;
  std::vector<std::string> vsHDHeaders, vsRGHeaders, vsPGHeaders;

  bCheckSQ = bVerbose = false;
  bClear = true;

  while ( (c = getopt_long(argc, argv, "vf:i:o:l:", getopt_long_options, &n_option_index)) != -1 ) {
      //    std::cout << getopt_long_options[n_option_index].name << "\t" << optarg << std::endl;
    if ( c == 'f' ) {
      sFasta = optarg;
    }
    else if ( c == 'i' ) {
      sInFile = optarg;
    }
    else if ( c == 'o' ) {
      sOutFile = optarg;
    }
    else if ( c == 'v' ) {
      bVerbose = true;
    }
    else if ( c == 'l' ) {
	sLogFile = optarg;
    }
    else if ( strcmp(getopt_long_options[n_option_index].name,"AS") == 0 ) {
      sAS = optarg;
    }
    else if ( strcmp(getopt_long_options[n_option_index].name,"UR") == 0 ) {
      sUR = optarg;
    }
    else if ( strcmp(getopt_long_options[n_option_index].name,"SP") == 0 ) {
      sSP = optarg;
    }
    else if ( strcmp(getopt_long_options[n_option_index].name,"HD") == 0 ) {
      vsHDHeaders.push_back(optarg);
    }
    else if ( strcmp(getopt_long_options[n_option_index].name,"RG") == 0 ) {
      vsRGHeaders.push_back(optarg);
    }
    else if ( strcmp(getopt_long_options[n_option_index].name,"PG") == 0 ) {
      vsPGHeaders.push_back(optarg);
    }
    else if ( strcmp(getopt_long_options[n_option_index].name,"checkSQ") == 0 ) {
      bCheckSQ = true;
    }
    else {
      std::cerr << "Error: Unrecognized option " << getopt_long_options[n_option_index].name << std::endl;
      abort();
    }
  }

  if ( optind < argc ) {
    printUsage(std::cerr);
    gpLogger->error("non-option argument %s exist ",argv[optind]);
  }

  if ( sInFile.empty() || sOutFile.empty() ) {
    printUsage(std::cerr);
    gpLogger->error("Input and output files are required");
  }

  if ( sLogFile.compare("__NONE__") == 0 ) {
    sLogFile = (sOutFile + ".log");
  }

  gpLogger->open(sLogFile.c_str(), bVerbose);

  if ( ( bCheckSQ ) && ( sFasta.empty() ) ) {
    printUsage(std::cerr);
    gpLogger->error("--checkSQ option must be used with --fasta option");
  }

  // check whether each header line starts with a correct tag
  checkHeaderStarts(vsHDHeaders, "@HD\t");
  checkHeaderStarts(vsRGHeaders, "@RG\t");
  checkHeaderStarts(vsPGHeaders, "@PG\t");

  gpLogger->write_log("Arguments in effect:");
  gpLogger->write_log("\t--in [%s]",sInFile.c_str());
  gpLogger->write_log("\t--out [%s]",sOutFile.c_str());
  gpLogger->write_log("\t--log [%s]",sLogFile.c_str());
  gpLogger->write_log("\t--fasta [%s]",sFasta.c_str());
  gpLogger->write_log("\t--AS [%s]",sAS.c_str());
  gpLogger->write_log("\t--UR [%s]",sUR.c_str());
  gpLogger->write_log("\t--SP [%s]",sSP.c_str());
  gpLogger->write_log("\t--checkSQ [%s]",bClear ? "ON" : "OFF" );
  if ( vsHDHeaders.empty() ) {
    gpLogger->write_log("\t--HD []");
  }
  else {
    gpLogger->write_log("\t--HD [%s]",vsHDHeaders[0].c_str());
  }
  if ( vsRGHeaders.empty() ) {
    gpLogger->write_log("\t--RG []");
  }
  else {
    gpLogger->write_log("\t--RG [%s]",vsRGHeaders[0].c_str());
  }
  if ( vsPGHeaders.empty() ) {
    gpLogger->write_log("\t--PG []");
  }
  else {
    for(uint32_t i=0; i < vsPGHeaders.size(); ++i) {
      gpLogger->write_log("\t--PG [%s]",vsPGHeaders[i].c_str());
    }
  }

  if ( (vsHDHeaders.empty() ) && ( vsRGHeaders.empty() ) && ( vsPGHeaders.empty() ) && ( !bClear ) && ( sFasta.empty() ) ) {
    gpLogger->warning("No option is in effect for modifying BAM files. The input and output files will be identical");
  }

  if ( ( vsHDHeaders.size() > 1 ) || ( vsRGHeaders.size() > 1 ) ) {
    gpLogger->error("HD and RG headers cannot be multiple");
  }

  FastaFile fastaFile;
  if ( ! sFasta.empty() ) {
    if ( fastaFile.open(sFasta.c_str()) ) {
      gpLogger->write_log("Reading the reference file %s",sFasta.c_str());
      fastaFile.readThru();
      fastaFile.close();
      gpLogger->write_log("Finished reading the reference file %s",sFasta.c_str());      
    }
    else {
      gpLogger->error("Failed to open reference file %s",sFasta.c_str());
    }
  }

  SamFile samIn;
  SamFile samOut;

  if ( ! samIn.OpenForRead(sInFile.c_str()) ) {
    gpLogger->error("Cannot open BAM file %s for reading - %s",sInFile.c_str(), SamStatus::getStatusString(samIn.GetStatus()) );
  }
  if ( ! samOut.OpenForWrite(sOutFile.c_str()) ) {
    gpLogger->error("Cannot open BAM file %s for writing - %s",sOutFile.c_str(), SamStatus::getStatusString(samOut.GetStatus()) );
  }

  SamFileHeader samHeader;
  SamHeaderRecord* pSamHeaderRecord;
  samIn.ReadHeader(samHeader);

  // check the sanity of SQ file
  // make sure the SN and LN matches, with the same order
  if ( bCheckSQ ) {
    unsigned int numSQ = 0;
    while( (pSamHeaderRecord = samHeader.getNextHeaderRecord()) != NULL ) {
      if ( pSamHeaderRecord->getType() == SamHeaderRecord::SQ ) {
	++numSQ;
      }
    }

    if ( numSQ != fastaFile.vsSequenceNames.size() ) {
      gpLogger->error("# of @SQ tags are different from the original BAM and the reference file");
    }

    // iterator over all @SQ objects
    for(unsigned int i=0; i < numSQ; ++i) {
      pSamHeaderRecord = samHeader.getSQ(fastaFile.vsSequenceNames[i].c_str());
      if ( fastaFile.vsSequenceNames[i].compare(pSamHeaderRecord->getTagValue("SN")) != 0 ) {
	gpLogger->error("SequenceName is not identical between fasta and input BAM file");
      }
      else if ( static_cast<int>(fastaFile.vnSequenceLengths[i]) != atoi(pSamHeaderRecord->getTagValue("LN")) ) {
	gpLogger->error("SequenceLength is not identical between fasta and input BAM file");
      }
      else {
	if ( !sAS.empty() ) 
	  samHeader.setSQTag("AS",sAS.c_str(),fastaFile.vsSequenceNames[i].c_str());
	samHeader.setSQTag("M5",fastaFile.vsMD5sums[i].c_str(),fastaFile.vsSequenceNames[i].c_str());
	if ( !sUR.empty() ) 
	  samHeader.setSQTag("UR",sUR.c_str(),fastaFile.vsSequenceNames[i].c_str());
	if ( !sSP.empty() ) 
	  samHeader.setSQTag("SP",sSP.c_str(),fastaFile.vsSequenceNames[i].c_str());
      }
    }
    gpLogger->write_log("Finished checking the consistency of SQ tags");
  }
  else {
    gpLogger->write_log("Skipped checking the consistency of SQ tags");
  }

  // go over the headers again, 
  // assuming order of HD, SQ, RG, PG, and put proper tags at the end of the original tags

  gpLogger->write_log("Creating the header of new output file");
  //SamFileHeader outHeader;
  samHeader.resetHeaderRecordIter();

  for(unsigned int i=0; i < vsHDHeaders.size(); ++i) {
    samHeader.addHeaderLine(vsHDHeaders[i].c_str());
  }

  /*
  for(int i=0; i < fastaFile.vsSequenceNames.size(); ++i) {
    std::string s("@SQ\tSN:");
    char buf[1024];
    s += fastaFile.vsSequenceNames[i];
    sprintf(buf,"\tLN:%d",fastaFile.vnSequenceLengths[i]);
    s += buf;
    if ( !sAS.empty() ) {
      sprintf(buf,"\tAS:%s",sAS.c_str());
      s += buf;
    }
    if ( !sUR.empty() ) {
      sprintf(buf,"\tUR:%s",sUR.c_str());
      s += buf;
    }
    sprintf(buf,"\tM5:%s",fastaFile.vsMD5sums[i].c_str());
    s += buf;
    if ( !sSP.empty() ) {
      sprintf(buf,"\tSP:%s",sSP.c_str());
      s += buf;
    }
    outHeader.addHeaderLine(s.c_str());
    }*/

  for(unsigned int i=0; i < vsRGHeaders.size(); ++i) {
    samHeader.addHeaderLine(vsRGHeaders[i].c_str());
  }

  for(unsigned int i=0; i < vsPGHeaders.size(); ++i) {
    samHeader.addHeaderLine(vsPGHeaders[i].c_str());
  }

  samOut.WriteHeader(samHeader);
  gpLogger->write_log("Adding %d HD, %d RG, and %d PG headers",vsHDHeaders.size(), vsRGHeaders.size(), vsPGHeaders.size());
  gpLogger->write_log("Finished writing output headers");

  // parse RG tag and get RG ID to append
  std::string sRGID;
  if ( ! vsRGHeaders.empty() ) {
    std::vector<std::string> tokens;
    FastaFile::tokenizeString( vsRGHeaders[0].c_str(), tokens );
    for(unsigned int i=0; i < tokens.size(); ++i) {
      if ( tokens[i].find("ID:") == 0 ) {
	sRGID = tokens[i].substr(3);
      }
    }
  }
  
  gpLogger->write_log("Writing output BAM file");
  SamRecord samRecord;
  while (samIn.ReadRecord(samHeader, samRecord) == true) {
    if ( !sRGID.empty() ) {
      if ( samRecord.addTag("RG",'Z',sRGID.c_str()) == false ) {
	gpLogger->error("Failed to add a RG tag %s",sRGID.c_str());
      }
      // temporary code added
      if ( strncmp(samRecord.getReadName(),"seqcore_",8) == 0 ) {
	char buf[1024];
	sprintf(buf,"UM%s",samRecord.getReadName()+8);
	samRecord.setReadName(buf);
      }
    }
    samOut.WriteRecord(samHeader, samRecord);
    //if ( samIn.GetCurrentRecordCount() == 1000 ) break;
  }
  samOut.Close();
  gpLogger->write_log("Successfully written %d records",samIn.GetCurrentRecordCount());
  delete gpLogger;
  return 0;
}
Ejemplo n.º 7
0
SamStatus::Status ClipOverlap::handleSortedByReadName(SamFile& samIn, 
                                                      SamFile* samOutPtr)
{
    // Set returnStatus to success.  It will be changed
    // to the failure reason if any of the writes fail.
    SamStatus::Status returnStatus = SamStatus::SUCCESS;

    // Read the sam records.
    SamRecord* prevSamRecord = NULL;
    SamRecord* samRecord = new SamRecord;
    SamRecord* tmpRecord = new SamRecord;
    if((samRecord == NULL) || (tmpRecord == NULL))
    {
        std::cerr << "Failed to allocate a SamRecord, so exit.\n";
        return(SamStatus::FAIL_MEM);
    }

    // Keep reading records until ReadRecord returns false.
    while(samIn.ReadRecord(mySamHeader, *samRecord))
    {
        int16_t flag = samRecord->getFlag();
        if((flag & myIntExcludeFlags) != 0)
        {
            // This read should not be checked for overlaps.

            // Check if there is a previous SamRecord.
            if(prevSamRecord != NULL)
            {
                // There is a previous record.
                // If it has a different read name, write it.
                if(strcmp(samRecord->getReadName(), 
                          prevSamRecord->getReadName()) != 0)
                {
                    // Different read name, so write the previous record.
                    if((samOutPtr != NULL) && !myOverlapsOnly)
                    {
                        if(!samOutPtr->WriteRecord(mySamHeader, *prevSamRecord))
                        {
                            // Failed to write a record.
                            fprintf(stderr, "%s\n", samOutPtr->GetStatusMessage());
                            returnStatus = samOutPtr->GetStatus();
                        }
                    }
                    // Clear the previous record info.
                    tmpRecord = prevSamRecord;
                    prevSamRecord = NULL;
                } 
                // If it has the same read name, leave it in case there is another read with that name
            }
            // This record is not being checked for overlaps, so just write it and continue
            if((samOutPtr != NULL) && !myOverlapsOnly)
            {
                if(!samOutPtr->WriteRecord(mySamHeader, *samRecord))
                {
                    // Failed to write a record.
                    fprintf(stderr, "%s\n", samOutPtr->GetStatusMessage());
                    returnStatus = samOutPtr->GetStatus();
                }
            }
            continue;
        }

        if(prevSamRecord == NULL)
        {
            // Nothing to compare this record to, so set this
            // record to the previous, and the next record.
            prevSamRecord = samRecord;
            samRecord = tmpRecord;
            tmpRecord = NULL;
            continue;
        }

        // Check if the read name matches the previous read name.
        if(strcmp(samRecord->getReadName(), 
                  prevSamRecord->getReadName()) == 0)
        {
            bool overlap = false;
            // Same Read Name, so check clipping.
            OverlapHandler::OverlapInfo prevClipInfo = 
                myOverlapHandler->getOverlapInfo(*prevSamRecord);
            OverlapHandler::OverlapInfo curClipInfo = 
                myOverlapHandler->getOverlapInfo(*samRecord);
            
            // If either indicate a complete clipping, clip both.
            if((prevClipInfo == OverlapHandler::NO_OVERLAP_WRONG_ORIENT) ||
               (curClipInfo == OverlapHandler::NO_OVERLAP_WRONG_ORIENT))
            {
                overlap = true;
                myOverlapHandler->handleNoOverlapWrongOrientation(*prevSamRecord);
                // Don't update stats since this is the 2nd in the pair
                myOverlapHandler->handleNoOverlapWrongOrientation(*samRecord, 
                                                                  false);
            }
            else if((prevClipInfo == OverlapHandler::OVERLAP) ||
                    (prevClipInfo == OverlapHandler::SAME_START))
            {
                // The previous read starts at or before the current one.
                overlap = true;
                myOverlapHandler->handleOverlapPair(*prevSamRecord,
                                                    *samRecord);
            }
            else if(curClipInfo == OverlapHandler::OVERLAP)
            {
                // The current read starts before the previous one.
                overlap = true;
                myOverlapHandler->handleOverlapPair(*samRecord,
                                                    *prevSamRecord);
            }
            
            // Found a read pair, so write both records if: 
            //   1) output file is specified
            //   AND
            //     2a) all records should be written
            //     OR
            //     2b) the pair overlaps
            if((samOutPtr != NULL) && (!myOverlapsOnly || overlap))
            {
                if(!samOutPtr->WriteRecord(mySamHeader, *prevSamRecord))
                {
                    // Failed to write a record.
                    fprintf(stderr, "%s\n", samOutPtr->GetStatusMessage());
                    returnStatus = samOutPtr->GetStatus();
                }
                if(!samOutPtr->WriteRecord(mySamHeader, *samRecord))
                {
                    // Failed to write a record.
                    fprintf(stderr, "%s\n", samOutPtr->GetStatusMessage());
                    returnStatus = samOutPtr->GetStatus();
                }
            }
            // Setup for the next read with no previous.
            tmpRecord = prevSamRecord;
            prevSamRecord = NULL;
            
        }
        else
        {
            // Read name does not match, so write the previous record
            // if we are writing all records.
            if((samOutPtr != NULL) && !myOverlapsOnly)
            {
                if(!samOutPtr->WriteRecord(mySamHeader, *prevSamRecord))
                {
                    // Failed to write a record.
                    fprintf(stderr, "%s\n", samOutPtr->GetStatusMessage());
                    returnStatus = samOutPtr->GetStatus();
                }
            }
            // Store this record as the previous.
            tmpRecord = prevSamRecord;
            prevSamRecord = samRecord;
            samRecord = tmpRecord;
            tmpRecord = NULL;
        }
    }

    // Write the previous record if there is one.
    if((samOutPtr != NULL) && (prevSamRecord != NULL) && !myOverlapsOnly)
    {
        if(!samOutPtr->WriteRecord(mySamHeader, *prevSamRecord))
        {
            // Failed to write a record.
            fprintf(stderr, "%s\n", samOutPtr->GetStatusMessage());
            returnStatus = samOutPtr->GetStatus();
        }
        delete prevSamRecord;
    }

    if(samRecord != NULL)
    {
        delete samRecord;
    }
    if(tmpRecord != NULL)
    {
        delete tmpRecord;
    }

    if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
    {
        return(samIn.GetStatus());
    }
    return(returnStatus);
}
Ejemplo n.º 8
0
int Stats::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    String indexFile = "";
    bool basic = false;
    bool noeof = false;
    bool params = false;
    bool qual = false;
    bool phred = false;
    int maxNumReads = -1;
    bool unmapped = false;
    String pBaseQC = "";
    String cBaseQC = "";
    String regionList = "";
    int excludeFlags = 0;
    int requiredFlags = 0;
    bool withinRegion = false;
    int minMapQual = 0;
    String dbsnp = "";
    PosList *dbsnpListPtr = NULL;
    bool baseSum = false;
    int bufferSize = PileupHelper::DEFAULT_WINDOW_SIZE;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_PARAMETER_GROUP("Types of Statistics")
        LONG_PARAMETER("basic", &basic)
        LONG_PARAMETER("qual", &qual)
        LONG_PARAMETER("phred", &phred)
        LONG_STRINGPARAMETER("pBaseQC", &pBaseQC)
        LONG_STRINGPARAMETER("cBaseQC", &cBaseQC)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_INTPARAMETER("maxNumReads", &maxNumReads)
        LONG_PARAMETER("unmapped", &unmapped)
        LONG_STRINGPARAMETER("bamIndex", &indexFile)
        LONG_STRINGPARAMETER("regionList", &regionList)
        LONG_INTPARAMETER("excludeFlags", &excludeFlags)
        LONG_INTPARAMETER("requiredFlags", &requiredFlags)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("Optional phred/qual Only Parameters")
        LONG_PARAMETER("withinRegion", &withinRegion)
        LONG_PARAMETER_GROUP("Optional BaseQC Only Parameters")
        LONG_PARAMETER("baseSum", &baseSum)
        LONG_INTPARAMETER("bufferSize", &bufferSize)
        LONG_INTPARAMETER("minMapQual", &minMapQual)
        LONG_STRINGPARAMETER("dbsnp", &dbsnp)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    inputParameters.Read(argc-1, &(argv[1]));

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument for stats, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Use the index file if unmapped or regionList is not empty.
    bool useIndex = (unmapped|| (!regionList.IsEmpty()));

    // IndexFile is required, so check to see if it has been set.
    if(useIndex && (indexFile == ""))
    {
        // In file was not specified, so set it to the in file
        // + ".bai"
        indexFile = inFile + ".bai";
    }
    ////////////////////////////////////////
    // Setup in case pileup is used.
    Pileup<PileupElementBaseQCStats> pileup(bufferSize);
    // Initialize start/end positions.
    myStartPos = 0;
    myEndPos = -1;
    
    // Open the output qc file if applicable.
    IFILE baseQCPtr = NULL;
    if(!pBaseQC.IsEmpty() && !cBaseQC.IsEmpty())
    {
        usage();
        inputParameters.Status();
        // Cannot specify both types of baseQC.
        std::cerr << "Cannot specify both --pBaseQC & --cBaseQC." << std::endl;
        return(-1);
    }
    else if(!pBaseQC.IsEmpty())
    {
        baseQCPtr = ifopen(pBaseQC, "w");
        PileupElementBaseQCStats::setPercentStats(true);
    }
    else if(!cBaseQC.IsEmpty())
    {
        baseQCPtr = ifopen(cBaseQC, "w");
        PileupElementBaseQCStats::setPercentStats(false);
    }

    if(baseQCPtr != NULL)
    {
        PileupElementBaseQCStats::setOutputFile(baseQCPtr);
        PileupElementBaseQCStats::printHeader();
    }
    if((baseQCPtr != NULL) || baseSum)
    {
        PileupElementBaseQCStats::setMapQualFilter(minMapQual);
        PileupElementBaseQCStats::setBaseSum(baseSum);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the file for reading.
    SamFile samIn;
    if(!samIn.OpenForRead(inFile))
    {
        fprintf(stderr, "%s\n", samIn.GetStatusMessage());
        return(samIn.GetStatus());
    }

    samIn.SetReadFlags(requiredFlags, excludeFlags);

    // Set whether or not basic statistics should be generated.
    samIn.GenerateStatistics(basic);

    // Read the sam header.
    SamFileHeader samHeader;
    if(!samIn.ReadHeader(samHeader))
    {
        fprintf(stderr, "%s\n", samIn.GetStatusMessage());
        return(samIn.GetStatus());
    }

    // Open the bam index file for reading if we are
    // doing unmapped reads (also set the read section).
    if(useIndex)
    {
        samIn.ReadBamIndex(indexFile);

        if(unmapped)
        {
            samIn.SetReadSection(-1);
        }

        if(!regionList.IsEmpty())
        {
            myRegionList = ifopen(regionList, "r");
        }
    }

    //////////////////////////
    // Read dbsnp if specified and doing baseQC
    if(((baseQCPtr != NULL) || baseSum) && (!dbsnp.IsEmpty()))
    {
        // Read the dbsnp file.
        IFILE fdbSnp;
        fdbSnp = ifopen(dbsnp,"r");
        // Determine how many entries.
        const SamReferenceInfo& refInfo = samHeader.getReferenceInfo();
        int maxRefLen = 0;
        for(int i = 0; i < refInfo.getNumEntries(); i++)
        {
            int refLen = refInfo.getReferenceLength(i);
            if(refLen >= maxRefLen)
            {
                maxRefLen = refLen + 1;
            }
        }
        
        dbsnpListPtr = new PosList(refInfo.getNumEntries(),maxRefLen);

        if(fdbSnp==NULL)
        {
            std::cerr << "Open dbSNP file " << dbsnp.c_str() << " failed!\n";
        }
        else if(dbsnpListPtr == NULL)
        {
            std::cerr << "Failed to init the memory allocation for the dbsnpList.\n";
        }
        else
        {
            // Read the dbsnp file.
            StringArray tokens;
            String buffer;
            int position = 0;
            int refID = 0;

            // Loop til the end of the file.
            while (!ifeof(fdbSnp))
            {
                // Read the next line.
                buffer.ReadLine(fdbSnp);
                // If it does not have at least 2 columns, 
                // continue to the next line.
                if (buffer.IsEmpty() || buffer[0] == '#') continue;
                tokens.AddTokens(buffer);
                if(tokens.Length() < 2) continue;

                if(!tokens[1].AsInteger(position))
                {
                    std::cerr << "Improperly formatted region line, start position "
                              << "(2nd column) is not an integer: "
                              << tokens[1]
                              << "; Skipping to the next line.\n";         
                    continue;
                }

                // Look up the reference name.
                refID = samHeader.getReferenceID(tokens[0]);
                if(refID != SamReferenceInfo::NO_REF_ID)
                {
                    // Reference id was found, so add it to the dbsnp
                    dbsnpListPtr->addPosition(refID, position);
                }
        
                tokens.Clear();
                buffer.Clear();
            }
        }
        ifclose(fdbSnp);
    }

    // Read the sam records.
    SamRecord samRecord;

    int numReads = 0;

    //////////////////////
    // Setup in case doing a quality count.
    // Quality histogram.
    const int MAX_QUAL = 126;
    const int START_QUAL = 33;
    uint64_t qualCount[MAX_QUAL+1];
    for(int i = 0; i <= MAX_QUAL; i++)
    {
        qualCount[i] = 0;
    }
    
    const int START_PHRED = 0;
    const int PHRED_DIFF = START_QUAL - START_PHRED;
    const int MAX_PHRED = MAX_QUAL - PHRED_DIFF;
    uint64_t phredCount[MAX_PHRED+1];
    for(int i = 0; i <= MAX_PHRED; i++)
    {
        phredCount[i] = 0;
    }
    
    int refPos = 0;
    Cigar* cigarPtr = NULL;
    char cigarChar = '?';
    // Exclude clips from the qual/phred counts if unmapped reads are excluded.
    bool qualExcludeClips = excludeFlags & SamFlag::UNMAPPED;

    //////////////////////////////////
    // When not reading by sections, getNextSection returns true
    // the first time, then false the next time.
    while(getNextSection(samIn))
    {
        // Keep reading records from the file until SamFile::ReadRecord
        // indicates to stop (returns false).
        while(((maxNumReads < 0) || (numReads < maxNumReads)) && samIn.ReadRecord(samHeader, samRecord))
        {
            // Another record was read, so increment the number of reads.
            ++numReads;
            // See if the quality histogram should be genereated.
            if(qual || phred)
            {
                // Get the quality.
                const char* qual = samRecord.getQuality();
                // Check for no quality ('*').
                if((qual[0] == '*') && (qual[1] == 0))
                {
                    // This record does not have a quality string, so no 
                    // quality processing is necessary.
                }
                else
                {
                    int index = 0;
                    cigarPtr = samRecord.getCigarInfo();
                    cigarChar = '?';
                    refPos = samRecord.get0BasedPosition();
                    if(!qualExcludeClips && (cigarPtr != NULL))
                    {
                        // Offset the reference position by any soft clips
                        // by subtracting the queryIndex of this start position.
                        // refPos is now the start position of the clips.
                        refPos -= cigarPtr->getQueryIndex(0);
                    }

                    while(qual[index] != 0)
                    {
                        // Skip this quality if it is clipped and we are skipping clips.
                        if(cigarPtr != NULL)
                        {
                            cigarChar = cigarPtr->getCigarCharOpFromQueryIndex(index);
                        }
                        if(qualExcludeClips && Cigar::isClip(cigarChar))
                        {
                            // Skip a clipped quality.
                            ++index;
                            // Increment the position.
                            continue;
                        }

                        if(withinRegion && (myEndPos != -1) && (refPos >= myEndPos))
                        {
                            // We have hit the end of the region, stop processing this
                            // quality string.
                            break;
                        }

                        if(withinRegion && (refPos < myStartPos))
                        {
                            // This position is not in the target.
                            ++index;
                            // Update the position if this is found in the reference or a clip.
                            if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                            {
                                ++refPos;
                            }
                            continue;
                        }

                        // Check for valid quality.
                        if((qual[index] < START_QUAL) || (qual[index] > MAX_QUAL))
                        {
                            if(qual)
                            {
                                std::cerr << "Invalid Quality found: " << qual[index] 
                                          << ".  Must be between "
                                          << START_QUAL << " and " << MAX_QUAL << ".\n";
                            }
                            if(phred)
                            {
                                std::cerr << "Invalid Phred Quality found: " << qual[index] - PHRED_DIFF
                                          << ".  Must be between "
                                          << START_QUAL << " and " << MAX_QUAL << ".\n";
                            }
                            // Skip an invalid quality.
                            ++index;
                            // Update the position if this is found in the reference or a clip.
                            if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                            {
                                ++refPos;
                            }
                            continue;
                        }
                        
                        // Increment the count for this quality.
                        ++(qualCount[(int)(qual[index])]);
                        ++(phredCount[(int)(qual[index]) - PHRED_DIFF]);
                        // Update the position if this is found in the reference or a clip.
                        if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                        {
                            ++refPos;
                        }
                        ++index;
                    }
                }
            }

            // Check the next thing to do for the read.
            if((baseQCPtr != NULL) || baseSum)
            {
                // Pileup the bases for this read.
                pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr);
            }
        }

        // Done with a section, move on to the next one.

        // New section, so flush the pileup.
        pileup.flushPileup();
    }

    // Flush the rest of the pileup.
    if((baseQCPtr != NULL) || baseSum)
    {
        // Pileup the bases.
        pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr);
        PileupElementBaseQCStats::printSummary();
        ifclose(baseQCPtr);
    }

    std::cerr << "Number of records read = " << 
        samIn.GetCurrentRecordCount() << std::endl;

    if(basic)
    {
        std::cerr << std::endl;
        samIn.PrintStatistics();
    }

    // Print the quality stats.
    if(qual)
    {
        std::cerr << std::endl;
        std::cerr << "Quality\tCount\n";
        for(int i = START_QUAL; i <= MAX_QUAL; i++)
        {
            std::cerr << i << "\t" << qualCount[i] << std::endl;
        }
    }
    // Print the phred quality stats.
    if(phred)
    {
        std::cerr << std::endl;
        std::cerr << "Phred\tCount\n";
        for(int i = START_PHRED; i <= MAX_PHRED; i++)
        {
            std::cerr << i << "\t" << phredCount[i] << std::endl;
        }
    }

    SamStatus::Status status = samIn.GetStatus();
    if(status == SamStatus::NO_MORE_RECS)
    {
        // A status of NO_MORE_RECS means that all reads were successful.
        status = SamStatus::SUCCESS;
    }

    return(status);
}
Ejemplo n.º 9
0
// main function
int TrimBam::execute(int argc, char ** argv)
{
  SamFile samIn;
  SamFile samOut;
  int numTrimBaseL = 0;
  int numTrimBaseR = 0;
  bool noeof = false;
  bool ignoreStrand = false;
  bool noPhoneHome = false;
  std::string inName = "";
  std::string outName = "";

  if ( argc < 5 ) {
    usage();
    std::cerr << "ERROR: Incorrect number of parameters specified\n";
    return(-1);
  }
  inName = argv[2];
  outName = argv[3];

  static struct option getopt_long_options[] = {
      // Input options
      { "left", required_argument, NULL, 'L'},
      { "right", required_argument, NULL, 'R'},
      { "ignoreStrand", no_argument, NULL, 'i'},
      { "noeof", no_argument, NULL, 'n'},
      { "noPhoneHome", no_argument, NULL, 'p'},
      { "nophonehome", no_argument, NULL, 'P'},
      { "phoneHomeThinning", required_argument, NULL, 't'},
      { "phonehomethinning", required_argument, NULL, 'T'},
      { NULL, 0, NULL, 0 },
  };
  
  int argIndex = 4;
  if(argv[argIndex][0] != '-')
  {
      // This is the number of bases to trim off both sides
      // so convert to a number.
      numTrimBaseL = atoi(argv[argIndex]);
      numTrimBaseR = numTrimBaseL;
      ++argIndex;
  }

  int c = 0;
  int n_option_index = 0;
  // Process any additional parameters
  while ( ( c = getopt_long(argc, argv,
                            "L:R:in", getopt_long_options, &n_option_index) )
          != -1 )
  {
      switch(c) 
      {
          case 'L':
              numTrimBaseL = atoi(optarg);
              break;
          case 'R':
              numTrimBaseR = atoi(optarg);
              break;
          case 'i':
              ignoreStrand = true;
              break;
          case 'n':
              noeof = true;
              break;
          case 'p':
          case 'P':
              noPhoneHome = true;
              break;
          case 't':
          case 'T':
              PhoneHome::allThinning = atoi(optarg);
              break;
          default:
              fprintf(stderr,"ERROR: Unrecognized option %s\n",
                      getopt_long_options[n_option_index].name);
              return(-1);
      }
  }

  if(!noPhoneHome)
  {
      PhoneHome::checkVersion(getProgramName(), VERSION);
  }
  
  if(noeof)
  {
      // Set that the eof block is not required.
      BgzfFileType::setRequireEofBlock(false);
  }

  if ( ! samIn.OpenForRead(inName.c_str()) ) {
      fprintf(stderr, "***Problem opening %s\n",inName.c_str());
    return(-1);
  }

  if(!samOut.OpenForWrite(outName.c_str())) {
    fprintf(stderr, "%s\n", samOut.GetStatusMessage());
    return(samOut.GetStatus());
  }
  
  fprintf(stderr,"Arguments in effect: \n");
  fprintf(stderr,"\tInput file : %s\n",inName.c_str());
  fprintf(stderr,"\tOutput file : %s\n",outName.c_str());
  if(numTrimBaseL == numTrimBaseR)
  {
      fprintf(stderr,"\t#Bases to trim from each side : %d\n", numTrimBaseL);
  }
  else
  {
      fprintf(stderr,"\t#Bases to trim from the left of forward strands : %d\n",
              numTrimBaseL);
      fprintf(stderr,"\t#Bases to trim from the right of forward strands: %d\n",
              numTrimBaseR);
      if(!ignoreStrand)
      {
          // By default, reverse strands are treated the opposite.
          fprintf(stderr,"\t#Bases to trim from the left of reverse strands : %d\n",
                  numTrimBaseR);
          fprintf(stderr,"\t#Bases to trim from the right of reverse strands : %d\n",
                  numTrimBaseL);
      }
      else
      {
          // ignore strand, treating forward & reverse strands the same
          fprintf(stderr,"\t#Bases to trim from the left of reverse strands : %d\n",
                  numTrimBaseL);
          fprintf(stderr,"\t#Bases to trim from the right of reverse strands : %d\n",
                  numTrimBaseR);
      }
  }
 
   // Read the sam header.
   SamFileHeader samHeader;
   if(!samIn.ReadHeader(samHeader))
   {
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
      return(samIn.GetStatus());
   }

   // Write the sam header.
   if(!samOut.WriteHeader(samHeader))
   {
      fprintf(stderr, "%s\n", samOut.GetStatusMessage());
      return(samOut.GetStatus());     
   }

   SamRecord samRecord;
   char seq[65536];
   char qual[65536];
   int i, len;

   // Keep reading records until ReadRecord returns false.
   while(samIn.ReadRecord(samHeader, samRecord)) {
     // Successfully read a record from the file, so write it.
     strcpy(seq,samRecord.getSequence());
     strcpy(qual,samRecord.getQuality());

     // Number of bases to trim from the left/right,
     // set based on ignoreStrand flag and strand info.
     int trimLeft = numTrimBaseL;
     int trimRight = numTrimBaseR;
     if(!ignoreStrand)
     {
         if(SamFlag::isReverse(samRecord.getFlag()))
         {
             // We are reversing the reverse reads,
             // so swap the left & right trim counts.
             trimRight = numTrimBaseL;
             trimLeft = numTrimBaseR;
         }
     }

     len = strlen(seq);
     // Do not trim if sequence is '*'
     if ( strcmp(seq, "*") != 0 ) {
       bool qualValue = true;
       if(strcmp(qual, "*") == 0)
       {
           qualValue = false;
       }
       int qualLen = strlen(qual);
       if ( (qualLen != len) && qualValue ) {
         fprintf(stderr,"ERROR: Sequence and Quality have different length\n");
         return(-1);
       }
       if ( len < (trimLeft + trimRight) ) {
         // Read Length is less than the total number of bases to trim,
         // so trim the entire read.
         for(i=0; i < len; ++i) {
           seq[i] = 'N';
           if ( qualValue ) {
             qual[i] = '!';
           }
         }
       }
       else
       {
           // Read Length is larger than the total number of bases to trim,
           // so trim from the left, then from the right.
           for(i=0; i < trimLeft; ++i)
           {
               // Trim the bases from the left.
               seq[i] = 'N';
               if ( qualValue )
               {
                   qual[i] = '!';
               }
           }
           for(i = 0; i < trimRight; i++)
           {
               seq[len-i-1] = 'N';
               if(qualValue)
               {
                   qual[len-i-1] = '!';
               }
           }
       }
       samRecord.setSequence(seq);
       samRecord.setQuality(qual);
     }

     if(!samOut.WriteRecord(samHeader, samRecord)) {
         // Failed to write a record.
       fprintf(stderr, "Failure in writing record %s\n", samOut.GetStatusMessage());
       return(-1);
     }
   }
   
   if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
   {
      // Failed to read a record.
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
   }   
   
   std::cerr << std::endl << "Number of records read = " << 
     samIn.GetCurrentRecordCount() << std::endl;
   std::cerr << "Number of records written = " << 
     samOut.GetCurrentRecordCount() << std::endl;

   if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
   {
     // Failed reading a record.
     return(samIn.GetStatus());
   }

   // Since the reads were successful, return the status based
   samIn.Close();
   samOut.Close();
   return 0;
}
Ejemplo n.º 10
0
int GapInfo::processFile(const char* inputFileName, const char* outputFileName,
                         const char* refFile, bool detailed,
                         bool checkFirst, bool checkStrand)
{
    // Open the file for reading.
    SamFile samIn;
    samIn.OpenForRead(inputFileName);

    // Read the sam header.
    SamFileHeader samHeader;
    samIn.ReadHeader(samHeader);

    SamRecord samRecord;

    GenomeSequence* refPtr = NULL;
    if(strcmp(refFile, "") != 0)
    {
        refPtr = new GenomeSequence(refFile);
    }

    IFILE outFile = ifopen(outputFileName, "w");

    // Map for summary.
    std::map<int, int> gapInfoMap;


    // Keep reading records until ReadRecord returns false.
    while(samIn.ReadRecord(samHeader, samRecord))
    {
        uint16_t samFlags = samRecord.getFlag();

        if((!SamFlag::isMapped(samFlags)) || 
           (!SamFlag::isMateMapped(samFlags)) ||
           (!SamFlag::isPaired(samFlags)) ||
           (samFlags & SamFlag::SECONDARY_ALIGNMENT) || 
           (SamFlag::isDuplicate(samFlags)) ||
           (SamFlag::isQCFailure(samFlags)))
        {
            // unmapped, mate unmapped, not paired,
            // not the primary alignment,
            // duplicate, fails vendor quality check 
            continue;
        }

        // No gap info if the chromosome names are different or
        // are unknown.
        int32_t refID = samRecord.getReferenceID();
        if((refID != samRecord.getMateReferenceID()) || (refID == -1))
        {
            continue;
        }

        int32_t readStart = samRecord.get0BasedPosition();
        int32_t mateStart = samRecord.get0BasedMatePosition();

        // If the mate starts first, then the pair was processed by
        // the mate.
        if(mateStart < readStart)
        {
            continue;
        }
        if((mateStart == readStart) && (SamFlag::isReverse(samFlags)))
        {
            // read and mate start at the same position, so 
            // only process the forward strand.
            continue;
        }

        // Process this read pair.
        int32_t readEnd = samRecord.get0BasedAlignmentEnd();
        
        int32_t gapSize = mateStart - readEnd - 1;

        if(detailed)
        {
            // Output the gap info.
            ifprintf(outFile, "%s\t%d\t%d", 
                     samRecord.getReferenceName(), readEnd+1, gapSize);
            
            // Check if it is not the first or if it is not the forward strand.
            if(checkFirst && !SamFlag::isFirstFragment(samFlags))
            {
                ifprintf(outFile, "\tNotFirst");
            }
            if(checkStrand && SamFlag::isReverse(samFlags))
            {
                ifprintf(outFile, "\tReverse");
            }
            ifprintf(outFile, "\n");
        }
        else
        {
            // Summary.
            // Skip reads that are not the forward strand.
            if(SamFlag::isReverse(samFlags))
            {
                // continue
                continue;
            }

            // Forward.
            // Check the reference for 'N's.
            if(refPtr != NULL)
            {
                genomeIndex_t chromStartIndex = 
                    refPtr->getGenomePosition(samRecord.getReferenceName());
                if(chromStartIndex == INVALID_GENOME_INDEX)
                {
                    // Invalid position, so continue to the next one.
                    continue;
                }
                bool skipRead = false;
                for(int i = readEnd + 1; i < mateStart; i++)
                {
                    if((*refPtr)[i] == 'N')
                    {
                        // 'N' in the reference, so continue to the next read.
                        skipRead = true;
                        break;
                    }
                }
                if(skipRead)
                {
                    continue;
                }
            }
            
            // Update the gapInfo.
            gapInfoMap[gapSize]++;
        }
    }

    if(!detailed)
    {
        // Output the summary.
        ifprintf(outFile, "GapSize\tNumPairs\n");
        for(std::map<int,int>::iterator iter = gapInfoMap.begin(); 
            iter != gapInfoMap.end(); iter++)
        {
            ifprintf(outFile, "%d\t%d\n", (*iter).first, (*iter).second);
        }
    }
    

    SamStatus::Status returnStatus = samIn.GetStatus();
    if(returnStatus == SamStatus::NO_MORE_RECS)
    {
        return(SamStatus::SUCCESS);
    }
    return(returnStatus);
}