コード例 #1
0
int ReadReference::execute(int argc, char **argv)
{
    static const int UNSPECIFIED_INT = -1;
    String refFile = "";
    String refName = "";
    int start = UNSPECIFIED_INT;
    int numBases = UNSPECIFIED_INT;
    int end = UNSPECIFIED_INT;
    bool params = false;
    
    // Read in the parameters.    
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_STRINGPARAMETER("refFile", &refFile)
        LONG_STRINGPARAMETER("refName", &refName)
        LONG_INTPARAMETER("start", &start)
        LONG_INTPARAMETER("end", &end)
        LONG_INTPARAMETER("numBases", &numBases)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);
    
    if((refName == "") || (start == UNSPECIFIED_INT) || 
       ((end == UNSPECIFIED_INT) && (numBases == UNSPECIFIED_INT)))
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing Required Parameter\n\n";
        return(-1);
    }
    if((end != UNSPECIFIED_INT) && (numBases != UNSPECIFIED_INT))
    {
        usage();
        inputParameters.Status();
        std::cerr << "Only --end or --numBases can be specified\n\n";
        return(-1);
    }
    else if(numBases != UNSPECIFIED_INT)
    {
        end = start + numBases;
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the reference.
    GenomeSequence reference(refFile);

    uint32_t refStart = 
        reference.getGenomePosition(refName.c_str());

    if(refStart == INVALID_GENOME_INDEX)
    {
        std::cerr << "Reference Name: " << refName.c_str()
                  << " not found in the reference file\n"; 
        return(-1);
    }

    std::string refString;
    
    reference.getString(refString, refStart + start, end - start);
    std::cout << refString << std::endl;
    
    return(0);
}
コード例 #2
0
ファイル: Revert.cpp プロジェクト: rtchen/gotcloud
int Revert::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    String outFile = "";
    bool cigar = false;
    bool qual = false;
    bool noeof = false;
    bool params = false;
    bool rmBQ = false;
    String rmTags = "";
    myKeepTags = false;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_STRINGPARAMETER("out", &outFile)
        LONG_PARAMETER("cigar", &cigar)
        LONG_PARAMETER("qual", &qual)
        LONG_PARAMETER("keepTags", &myKeepTags)
        LONG_PARAMETER("rmBQ", &rmBQ)
        LONG_STRINGPARAMETER("rmTags", &rmTags)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }
    
    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    if(outFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--out is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the input file for reading.
    SamFile samIn;
    samIn.OpenForRead(inFile);

    // Open the output file for writing.
    SamFile samOut;
    samOut.OpenForWrite(outFile);

    // Read the sam header.
    SamFileHeader samHeader;
    samIn.ReadHeader(samHeader);

    // Write the sam header.
    samOut.WriteHeader(samHeader);

    SamRecord samRecord;

    // Set returnStatus to success.  It will be changed to the
    // failure reason if any of the writes or updates fail.
    SamStatus::Status returnStatus = SamStatus::SUCCESS;

    // Keep reading records until ReadRecord returns false.
    while(samIn.ReadRecord(samHeader, samRecord))
    {
        // Update the cigar & position.
        if(cigar)
        {
            if(!updateCigar(samRecord))
            {
                // Failed to update the cigar & position.
                fprintf(stderr, "%s\n", samIn.GetStatusMessage());
                returnStatus = samIn.GetStatus();
            }
        }
        if(qual)
        {
            if(!updateQual(samRecord))
            {
                // Failed to update the quality.
                fprintf(stderr, "%s\n", samIn.GetStatusMessage());
                returnStatus = samIn.GetStatus();
            }
        }

        if(rmBQ)
        {
            if(!removeBQ(samRecord))
            {
                // Failed to remove BQ.
                fprintf(stderr, "%s\n", samIn.GetStatusMessage());
                returnStatus = samIn.GetStatus();
            }
        }

        if(rmTags != "")
        {
            if(!samRecord.rmTags(rmTags.c_str()))
            {
                // Failed to remove the specified tags.
                fprintf(stderr, "%s\n", samIn.GetStatusMessage());
                returnStatus = samIn.GetStatus();
            }
        }

        // Successfully read a record from the file, so write it.
        if(!samOut.WriteRecord(samHeader, samRecord))
        {
            // Failed to write a record.
            fprintf(stderr, "%s\n", samOut.GetStatusMessage());
            returnStatus = samOut.GetStatus();
        }
    }

    std::cerr << std::endl << "Number of records read = " << 
        samIn.GetCurrentRecordCount() << std::endl;
    std::cerr << "Number of records written = " << 
        samOut.GetCurrentRecordCount() << std::endl;

    // Since the reads were successful, return the status based
    // on the status of the writes.  If any failed, return
    // their failure status.
    return(returnStatus);
}
コード例 #3
0
ファイル: Convert.cpp プロジェクト: Griffan/bamUtil
int Convert::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    String outFile = "";
    String refFile = "";
    bool lshift = false;
    bool noeof = false;
    bool params = false;

    bool useBases = false;
    bool useEquals = false;
    bool useOrigSeq = false;

    bool recover = false;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_STRINGPARAMETER("out", &outFile)
        LONG_STRINGPARAMETER("refFile", &refFile)
        LONG_PARAMETER("lshift", &lshift)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("recover", &recover)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("SequenceConversion")
            EXCLUSIVE_PARAMETER("useBases", &useBases)
            EXCLUSIVE_PARAMETER("useEquals", &useEquals)
            EXCLUSIVE_PARAMETER("useOrigSeq", &useOrigSeq)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }
    
    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        printUsage(std::cerr);
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    if(outFile == "")
    {
        printUsage(std::cerr);
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--out is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Check to see if the ref file was specified.
    // Open the reference.
    GenomeSequence* refPtr = NULL;
    if(refFile != "")
    {
        refPtr = new GenomeSequence(refFile);
    }

    SamRecord::SequenceTranslation translation;
    if((useBases) && (refPtr != NULL))
    {
        translation = SamRecord::BASES;
    }
    else if((useEquals) && (refPtr != NULL))
    {
        translation = SamRecord::EQUAL;
    }
    else
    {
        useOrigSeq = true;
        translation = SamRecord::NONE;
    }
    
    if(params)
    {
        inputParameters.Status();
    }

    // Open the input file for reading.
    SamFile samIn;
    if(recover) samIn.setAttemptRecovery(true);
    samIn.OpenForRead(inFile);

    // Open the output file for writing.
    SamFile samOut;
    samOut.OpenForWrite(outFile);
    samOut.SetWriteSequenceTranslation(translation);
    samOut.SetReference(refPtr);

    // Read the sam header.
    SamFileHeader samHeader;
    samIn.ReadHeader(samHeader);

    // Write the sam header.
    samOut.WriteHeader(samHeader);

    SamRecord samRecord;

    // Set returnStatus to success.  It will be changed
    // to the failure reason if any of the writes fail.
    SamStatus::Status returnStatus = SamStatus::SUCCESS;

    while(1) {
        try {
            // Keep reading records until ReadRecord returns false.
            while(samIn.ReadRecord(samHeader, samRecord))
            {
                // left shift if necessary.
                if(lshift)
                {
                    samRecord.shiftIndelsLeft();
                }

                // Successfully read a record from the file, so write it.
                if(!samOut.WriteRecord(samHeader, samRecord))
                {
                    // Failed to write a record.
                    fprintf(stderr, "%s\n", samOut.GetStatusMessage());
                    returnStatus = samOut.GetStatus();
                }
            }
            break;
        } catch (std::runtime_error e) {
            std::cerr << "Caught runtime error: " << e.what() << "\n";
            if(!recover) {
                std::cerr << "Corrupted BAM file detected - consider using --recover option.\n";
                break;
            }
            std::cerr << "Attempting to resync at next good BGZF block and BAM record.\n";
            // XXX need to resync SamFile stream here
            bool rc = samIn.attemptRecoverySync(checkSignature, SIGNATURE_LENGTH);
            if(rc) {
                std::cerr << "Successful resync - some data lost.\n";
                continue;    // succeeded
            }
            std::cerr << "Failed to re-sync on data stream.\n";
            break;              // failed to resync
        }
    }

    std::cerr << std::endl << "Number of records read = " << 
        samIn.GetCurrentRecordCount() << std::endl;
    std::cerr << "Number of records written = " << 
        samOut.GetCurrentRecordCount() << std::endl;

    if(refPtr != NULL)
    {
        delete(refPtr);
    }

    // Since the reads were successful, return the status based
    // on the status of the writes.  If any failed, return
    // their failure status.
    return(returnStatus);
}
コード例 #4
0
ファイル: VcfConvert.cpp プロジェクト: statgen/vcfUtil
int VcfConvert::execute(int argc, char **argv)
{
    String refFile = "";
    String inputVcf = "";
    String outputVcf = "";
    String refName = "";
    bool uncompress = false;
    bool params = false;
    bool noeof = false;
    
    // Read in the parameters.    
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inputVcf)
        LONG_STRINGPARAMETER("out", &outputVcf)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_PARAMETER("uncompress", &uncompress)
        LONG_STRINGPARAMETER("refName", &refName)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    inputParameters.Read(argc-1, &(argv[1]));
    
    // Check that all files were specified.
    if(inputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in\", a required parameter.\n\n";
        return(-1);
    }
    if(outputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--out\", a required parameter.\n\n";
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    VcfFileReader inFile;
    VcfFileWriter outFile;
    VcfHeader header;
    
    // Open the file.
    inFile.open(inputVcf, header);

    if(refName != "")
    {
        inFile.setReadSection(refName.c_str());
    }

    if(uncompress)
    {
        outFile.open(outputVcf, header, InputFile::DEFAULT);
    }
    else
    {
        outFile.open(outputVcf, header);
    }

    VcfRecord record;
    int numRecords = 0;

    while(inFile.readRecord(record))
    {
        ++numRecords;

        outFile.writeRecord(record);
    }
 
    inFile.close();   

    std::cerr << "NumRecords: " << numRecords << "\n";
    return(0);
}
コード例 #5
0
ファイル: ClipOverlap.cpp プロジェクト: statgen/bamUtil
int ClipOverlap::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    String outFile = "";
    String storeOrig = "";
    bool readName = false;
    bool noRNValidate = false;
    bool stats = false;
    int poolSize = DEFAULT_POOL_SIZE;
    bool unmapped = false;
    bool noeof = false;
    bool params = false;
    String excludeFlags = "0xF0C";

    // TODO, cleanup legacy parameters
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_STRINGPARAMETER("out", &outFile)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_STRINGPARAMETER("storeOrig", &storeOrig)
        LONG_PARAMETER("readName", &readName)
        LONG_PARAMETER ("noRNValidate", &noRNValidate)
        LONG_PARAMETER ("stats", &stats)
        LONG_PARAMETER ("overlapsOnly", &myOverlapsOnly)
        LONG_STRINGPARAMETER ("excludeFlags", &excludeFlags)
        LONG_PARAMETER("unmapped", &unmapped)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("Coordinate Processing Optional Parameters")
        LONG_INTPARAMETER("poolSize", &poolSize)
        LONG_PARAMETER("poolSkipOverlap", &myPoolSkipOverlap)
        LONG_PHONEHOME(VERSION)
        BEGIN_LEGACY_PARAMETERS()
        LONG_PARAMETER ("clipsOnly", &myOverlapsOnly)
        LONG_PARAMETER("poolSkipClip", &myPoolSkipOverlap)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        printUsage(std::cerr);
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Check to see if the out file was specified, if not, report an error.
    if(outFile == "")
    {
        printUsage(std::cerr);
        inputParameters.Status();
        // Out file was not specified but it is mandatory.
        std::cerr << "--out is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    if((storeOrig.Length() != 0) && (storeOrig.Length() != 2))
    {
        printUsage(std::cerr);
        inputParameters.Status();
        std::cerr << "--storeOrig tag name must be 2 characters.\n";
        return(-1);
    }

    myOverlapHandler = new OverlapClipLowerBaseQual();
    if(myOverlapHandler == NULL)
    {
        printUsage(std::cerr);
        inputParameters.Status();
        std::cerr << "Failed to allocate the overlap handler\n";
        return(-1);
    }

    if(unmapped)
    {
        myOverlapHandler->markAsUnmapped();
    }

    // Setup the overlap handler.
    myOverlapHandler->keepStats(stats);
    if(storeOrig.Length() != 0)
    {
        myOverlapHandler->storeOrigCigar(storeOrig);
    }

    myIntExcludeFlags = excludeFlags.AsInteger();

    if(params)
    {
        inputParameters.Status();
    }

    // For each step process the file.
    // Open the files & read/write the sam header.
    SamStatus::Status runStatus = SamStatus::SUCCESS;
    for(int i = 1; i <= myOverlapHandler->numSteps(); i++)
    {
        // Open the file for reading.
        mySamHeader.resetHeader();
        SamFile samIn(inFile, SamFile::READ, &mySamHeader);
        SamFile* samOutPtr = NULL;
        // Check if writing, if so, open the output file.
        if(i == myOverlapHandler->numSteps())
        {
            samOutPtr = new SamFile(outFile, SamFile::WRITE, &mySamHeader);
        }

        if(readName)
        {
            if(!noRNValidate)
            {
                samIn.setSortedValidation(SamFile::QUERY_NAME);
            }
            runStatus = handleSortedByReadName(samIn, samOutPtr);
        }
        else
        {
            // Coordinate sorted, so work with the pools.
            samIn.setSortedValidation(SamFile::COORDINATE);
            myPool.setMaxAllocatedRecs(poolSize);

            // Reset the number of failures
            myNumMateFailures = 0;
            myNumPoolFail = 0;
            myNumPoolFailNoHandle = 0;
            myNumPoolFailHandled = 0;
            myNumOutOfOrder = 0;

            // Run by coordinate
            if(samOutPtr != NULL)
            {
                // Setup the output buffer for writing.
                SamCoordOutput outputBuffer(myPool);
                outputBuffer.setOutputFile(samOutPtr, &mySamHeader);
                runStatus = handleSortedByCoord(samIn, &outputBuffer);

                // Cleanup the output buffer.
                if(!outputBuffer.flushAll())
                {
                    std::cerr << "ERROR: Failed to flush the output buffer\n";
                    runStatus = SamStatus::FAIL_IO;
                }
            }
            else
            {
                runStatus = handleSortedByCoord(samIn, NULL);
            }
        }

        if(runStatus != SamStatus::SUCCESS)
        {
            break;
        }
        // Close the input file, it will be reopened if there are 
        // multiple steps.
        samIn.Close();
        if(samOutPtr != NULL)
        {
            samOutPtr->Close();
            delete samOutPtr;
            samOutPtr = NULL;
        }
    }

    // Done processing.
    // Print Stats
    myOverlapHandler->printStats();

    if(myNumMateFailures != 0)
    {
        std::cerr << "WARNING: did not find expected overlapping mates for "
                  << myNumMateFailures << " records." << std::endl;
    }
    if(myNumPoolFail != 0)
    {
        // Had to skip clipping some records due to running out of
        // memory and not being able to wait for the mate.
        std::cerr << "WARNING: " << myNumPoolFail 
                  << " record pool failures\n";
        if(myNumPoolFailNoHandle != 0)
        {
            std::cerr << "Due to hitting the max record poolSize, skipped handling " 
                      << myNumPoolFailNoHandle << " records." << std::endl;
        }
        if(myNumPoolFailHandled != 0)
        {
            std::cerr << "Due to hitting the max record poolSize, default handled " 
                      << myNumPoolFailHandled << " records." << std::endl;
        }
        if(myNumOutOfOrder != 0)
        {
            std::cerr << "WARNING: Resulting File out of Order by " 
                      << myNumOutOfOrder << " records.\n";
        }
    }

    if(runStatus == SamStatus::SUCCESS)
    {
        if(myNumPoolFail == 0)
        {
            std::cerr << "Completed ClipOverlap Successfully.\n";
        }
        else
        {
            runStatus = SamStatus::NO_MORE_RECS;
            std::cerr << "Completed ClipOverlap with WARNINGS.\n";
        }
    }
    else
    {
        std::cerr << "Failed to complete ClipOverlap.\n";
    }
    return(runStatus);
}
コード例 #6
0
ファイル: Bam2FastQ.cpp プロジェクト: zorankiki/gotcloud
int Bam2FastQ::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    bool readName = false;
    String refFile = "";
    String firstOut = "";
    String secondOut = "";
    String unpairedOut = "";

    bool interleave = false;
    bool noeof = false;
    bool gzip = false;
    bool params = false;

    myOutBase = "";
    myNumMateFailures = 0;
    myNumPairs = 0;
    myNumUnpaired = 0;
    mySplitRG = false;
    myQField = "";
    myNumQualTagErrors = 0;
    myReverseComp = true;
    myRNPlus = false;
    myFirstRNExt = DEFAULT_FIRST_EXT;
    mySecondRNExt = DEFAULT_SECOND_EXT;
    myCompression = InputFile::DEFAULT;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_PARAMETER("readName", &readName)
        LONG_PARAMETER("splitRG", &mySplitRG)
        LONG_STRINGPARAMETER("qualField", &myQField)
        LONG_PARAMETER("merge", &interleave)
        LONG_STRINGPARAMETER("refFile", &refFile)
        LONG_STRINGPARAMETER("firstRNExt", &myFirstRNExt)
        LONG_STRINGPARAMETER("secondRNExt", &mySecondRNExt)
        LONG_PARAMETER("rnPlus", &myRNPlus)
        LONG_PARAMETER("noReverseComp", &myReverseComp)
        LONG_PARAMETER("gzip", &gzip)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("Optional OutputFile Names")
        LONG_STRINGPARAMETER("outBase", &myOutBase)
        LONG_STRINGPARAMETER("firstOut", &firstOut)
        LONG_STRINGPARAMETER("secondOut", &secondOut)
        LONG_STRINGPARAMETER("unpairedOut", &unpairedOut)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    if(gzip)
    {
        myCompression = InputFile::GZIP;
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Cannot specify both interleaved & secondOut since secondOut would be N/A.
    if(interleave && !secondOut.IsEmpty())
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --merge & --secondOut.\n";
        return(-1);
    }

    // Cannot specify both interleaved & secondOut since secondOut would be N/A.
    if(interleave && !secondOut.IsEmpty())
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --merge & --secondOut.\n";
        return(-1);
    }

    // Cannot specify both splitRG & firstOut/secondOut/unpairedOut
    // since it needs a different file for each RG.
    if(mySplitRG && (!firstOut.IsEmpty() || 
                   !secondOut.IsEmpty() || !unpairedOut.IsEmpty()))
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --splitRG & --firstOut/--secondOut/--unpairedOut.\n";
        std::cerr << "Use --outBase instead.\n";
        return(-1);
    }
    // Cannot specify splitRG & output to stdout.
    if(mySplitRG && (myOutBase[0] == '-'))
    {
        usage();
        inputParameters.Status();
        std::cerr << "ERROR: Cannot specify --splitRG & write to stdout.\n";
        return(-1);
    }

    // Check to see if the out file was specified, if not, generate it from
    // the input filename.
    if(myOutBase == "")
    {
        // Just remove the extension from the input filename.
        int extStart = inFile.FastFindLastChar('.');
        if(extStart <= 0)
        {
            myOutBase = inFile;
        }
        else
        {
            myOutBase = inFile.Left(extStart);
        }
    }

    if(mySplitRG)
    {
        std::string fqList = myOutBase.c_str();
        fqList += ".list";
        myFqList = ifopen(fqList.c_str(), "w");
        ifprintf(myFqList, "MERGE_NAME\tFASTQ1\tFASTQ2\tRG\n");
    }

    // Check to see if the first/second/single-ended were specified and
    // if not, set them.
    myFirstFileNameExt = "_1.fastq";
    mySecondFileNameExt = "_2.fastq";
    myUnpairedFileNameExt = ".fastq";
    if(interleave)
    {
        myFirstFileNameExt = "_interleaved.fastq";
        myFirstFileNameExt = "_interleaved.fastq";
    }
    getFileName(firstOut, myFirstFileNameExt);
    getFileName(secondOut, mySecondFileNameExt);
    getFileName(unpairedOut, myUnpairedFileNameExt);

    if(params)
    {
        inputParameters.Status();
    }

    // Open the files for reading/writing.
    // Open prior to opening the output files,
    // so if there is an error, the outputs don't get created.
    SamFile samIn;
    samIn.OpenForRead(inFile, &mySamHeader);
    // Skip non-primary reads.
    samIn.SetReadFlags(0, 0x0100);

    // Open the output files if not splitting RG
    if(!mySplitRG)
    {
        myUnpairedFile = ifopen(unpairedOut, "w", myCompression);

        // Only open the first file if it is different than an already opened file.
        if(firstOut != unpairedOut)
        {
            myFirstFile = ifopen(firstOut, "w", myCompression);
        }
        else
        {
            myFirstFile = myUnpairedFile;
        }

        // If it is interleaved or the 2nd file is not a new name, set it appropriately.
        if(interleave || secondOut == firstOut)
        {
            mySecondFile = myFirstFile;
        }
        else if(secondOut == unpairedOut)
        {
            mySecondFile = myUnpairedFile;
        }
        else
        {
            mySecondFile = ifopen(secondOut, "w", myCompression);
        }
    
        if(myUnpairedFile == NULL)
        {
            std::cerr << "Failed to open " << unpairedOut
                      << " so can't convert bam2FastQ.\n";
            return(-1);
        }
        if(myFirstFile == NULL)
        {
            std::cerr << "Failed to open " << firstOut
                      << " so can't convert bam2FastQ.\n";
            return(-1);
        }
        if(mySecondFile == NULL)
        {
            std::cerr << "Failed to open " << secondOut
                      << " so can't convert bam2FastQ.\n";
            return(-1);
        }
    }

    if((readName) || (strcmp(mySamHeader.getSortOrder(), "queryname") == 0))
    {
        readName = true;
    }
    else
    {
        // defaulting to coordinate sorted.
        samIn.setSortedValidation(SamFile::COORDINATE);
    }

    // Setup the '=' translation if the reference was specified.
    if(!refFile.IsEmpty())
    {
        GenomeSequence* refPtr = new GenomeSequence(refFile);
        samIn.SetReadSequenceTranslation(SamRecord::BASES);
        samIn.SetReference(refPtr);
    }

    SamRecord* recordPtr;
    int16_t samFlag;

    SamStatus::Status returnStatus = SamStatus::SUCCESS;
    while(returnStatus == SamStatus::SUCCESS)
    {
        recordPtr = myPool.getRecord();
        if(recordPtr == NULL)
        {
            // Failed to allocate a new record.
            throw(std::runtime_error("Failed to allocate a new SAM/BAM record"));
        }
        if(!samIn.ReadRecord(mySamHeader, *recordPtr))
        {
            // Failed to read a record.
            returnStatus = samIn.GetStatus();
            continue;
        }

        // Have a record.  Check to see if it is a pair or unpaired read.
        samFlag = recordPtr->getFlag();
        if(SamFlag::isPaired(samFlag))
        {
            if(readName)
            {
                handlePairedRN(*recordPtr);
            }
            else
            {
                handlePairedCoord(*recordPtr);
            }
        }
        else
        {
            ++myNumUnpaired;
            writeFastQ(*recordPtr, myUnpairedFile,
                       myUnpairedFileNameExt);
        }
    }

    // Flush All
    cleanUpMateMap(0, true);

    if(returnStatus == SamStatus::NO_MORE_RECS)
    {
        returnStatus = SamStatus::SUCCESS;
    }

    samIn.Close();
    closeFiles();
    
    // Output the results
    std::cerr << "\nFound " << myNumPairs << " read pairs.\n";
    std::cerr << "Found " << myNumUnpaired << " unpaired reads.\n";
    if(myNumMateFailures != 0)
    {
        std::cerr << "Failed to find mates for " << myNumMateFailures
                  << " reads, so they were written as unpaired\n"
                  << "  (not included in either of the above counts).\n";
    }
    if(myNumQualTagErrors != 0)
    {
        std::cerr << myNumQualTagErrors << " records did not have tag "
                  << myQField.c_str() << " or it was invalid, so the quality field was used for those records.\n";
    }

    return(returnStatus);
}
コード例 #7
0
ファイル: VcfSplit.cpp プロジェクト: statgen/vcfUtil
int VcfSplit::execute(int argc, char **argv)
{
    String refFile = "";
    String inputVcf = "";
    String outputVcfBase = "";
    String refName = "";
    bool uncompress = false;
    bool params = false;
    bool noeof = false;
    
    // Read in the parameters.    
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inputVcf)
        LONG_STRINGPARAMETER("obase", &outputVcfBase)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_PARAMETER("uncompress", &uncompress)
        LONG_STRINGPARAMETER("refName", &refName)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    inputParameters.Read(argc-1, &(argv[1]));
    
    // Check that all files were specified.
    if(inputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in\", a required parameter.\n\n";
        return(-1);
    }
    if(outputVcfBase == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--obase\", a required parameter.\n\n";
        return(-1);
    }
    outputVcfBase += ".";

    if(params)
    {
        inputParameters.Status();
    }

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    VcfFileReader inFile;
    std::map<std::string, VcfFileWriter*> outFiles;
    VcfHeader header;
    
    // Open the file.
    inFile.open(inputVcf, header);

    if(refName != "")
    {
        inFile.setReadSection(refName.c_str());
    }

    VcfRecord record;
    int numRecords = 0;

    std::string prevChr = "";
    std::string chr = "";
    VcfFileWriter* outFilePtr = 0;
    std::string outName = "";
    while(inFile.readRecord(record))
    {
        ++numRecords;

        chr = record.getChromStr();

        if((outFilePtr == 0) || (chr != prevChr))
        {
            outFilePtr = outFiles[chr];
            if(outFilePtr == 0)
            {
                outFilePtr = new VcfFileWriter();
                outFiles[chr] = outFilePtr;
                outName = outputVcfBase.c_str();
                if(chr.substr(0,3) != "chr")
                {
                    outName += "chr";
                }
                outName += chr + ".vcf";
                // chr not in outFile list.
                if(uncompress)
                {
                    outFilePtr->open(outName.c_str(), header, InputFile::DEFAULT);
                }
                else
                {
                    outName += ".gz";
                    outFilePtr->open(outName.c_str(), header);
                }
            }
        }
        outFilePtr->writeRecord(record);
    }
 
    inFile.close();   

    for (std::map<std::string,VcfFileWriter*>::iterator it = outFiles.begin();
         it != outFiles.end(); ++it)
    {
        if(it->second != 0)
        {
            it->second->close();
            it->second = 0;
        }
    }
  

    std::cerr << "NumRecords: " << numRecords << "\n";
    return(0);
}
コード例 #8
0
ファイル: VcfConsensus.cpp プロジェクト: statgen/vcfUtil
int VcfConsensus::execute(int argc, char ** argv)
{
    String vcfName1;
    String vcfName2;
    String vcfName3;
    String outputFileName;
    bool uncompress = false;
    bool params = false;

    // Read in the parameters.    
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in1", &vcfName1)
        LONG_STRINGPARAMETER("in2", &vcfName2)
        LONG_STRINGPARAMETER("in3", &vcfName3)
        LONG_STRINGPARAMETER("out", &outputFileName)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_PARAMETER("uncompress", &uncompress)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
       END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    inputParameters.Read(argc-1, &(argv[1]));
    
    std::string gtField = "GT";

    VcfFileReader vcf1;
    VcfFileReader vcf2;
    VcfFileReader vcf3;
    VcfHeader header1;
    VcfHeader header2;
    VcfHeader header3;
    VcfRecord record1;
    VcfRecord record2;
    VcfRecord record3;
    VcfRecordGenotype* genotypeInfoPtr1 = NULL;
    VcfRecordGenotype* genotypeInfoPtr2 = NULL;
    VcfRecordGenotype* genotypeInfoPtr3 = NULL;
    
    unsigned int numMissing2 = 0;
    unsigned int numMissing3 = 0;
    unsigned int numMismatchRefAlt = 0;
    unsigned int numMissingGT1 = 0;
    const unsigned int myMaxErrors = 4;

    // Check that the required parameters were set.
    if(vcfName1 == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in1\", a required parameter.\n\n";
        return(-1);
    }
    if(vcfName2 == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in2\", a required parameter.\n\n";
        return(-1);
    }
    if(vcfName3 == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in3\", a required parameter.\n\n";
        return(-1);
    }
    if(outputFileName == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--out\", a required parameter.\n\n";
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    
    // Open the files.
    vcf1.open(vcfName1, header1);
    vcf2.open(vcfName2, header2);
    vcf3.open(vcfName3, header3);

    // Setup the sample name maps.
    int numSamples = header1.getNumSamples();
    std::vector<int> sample2Indices;
    std::vector<int> sample3Indices;
    std::vector<int> removeIndices;
    int numSamplesSkipped1 = 0;
    int numSamplesSkipped2 = 0;
    int numSamplesSkipped3 = 0;
    for(int i = 0; i < numSamples; i++)
    {
        int sm2Index = header2.getSampleIndex(header1.getSampleName(i));
        int sm3Index = header3.getSampleIndex(header1.getSampleName(i));
        // Look for this sample name in vcf2.
        if((sm2Index != -1) && (sm3Index != -1))
        {
            sample2Indices.push_back(sm2Index);
            sample3Indices.push_back(sm3Index);
        }
        else
        {
            // Sample not found in all three vcfs.
            removeIndices.push_back(i);
            ++numSamplesSkipped1;
        }
    }
    // Remove samples not found in all 3 vcfs from header1.
    // Remove them in reverse order so they are removed from the end of the header first.
    VcfSubsetSamples subset1;
    subset1.init(header1, true);
    for(int i = (removeIndices.size() - 1); i >= 0; i--)
    {
        subset1.addExcludeSample(header1.getSampleName(removeIndices[i]));
        header1.removeSample(removeIndices[i]);
    }

    // Set numSamples to the new number of samples in header1.
    numSamples = header1.getNumSamples();

    // Calculate the number of samples skipped for files 2 & 3.
    numSamplesSkipped2 = header2.getNumSamples() - sample2Indices.size();
    numSamplesSkipped3 = header3.getNumSamples() - sample3Indices.size();

    if(numSamplesSkipped1 > 0)
    {
        std::cerr << "Skipping " << numSamplesSkipped1 << " samples from --in1\n";
    }
    if(numSamplesSkipped2 > 0)
    {
        std::cerr << "Skipping " << numSamplesSkipped2 << " samples from --in2\n";
    }
    if(numSamplesSkipped3 > 0)
    {
        std::cerr << "Skipping " << numSamplesSkipped3 << " samples from --in3\n";
    }

    VcfFileWriter outputVcf;
    // Open and write the header
    if(uncompress)
    {
        outputVcf.open(outputFileName, header1, InputFile::DEFAULT);
    }
    else
    {
        outputVcf.open(outputFileName, header1);
    }

    const char* chrom1 = NULL;
    int pos1 = UNSET_POS;

    // Read the first record from vcf2 & vcf3.
    vcf2.readRecord(record2);
    vcf3.readRecord(record3);

    bool newChrom = true;
    static std::string prevChrom = "";

    uint64_t numAllMatch = 0;
    uint64_t num1Match2Only = 0;
    uint64_t num1Match3Only = 0;
    uint64_t num2Match3Only = 0;
    uint64_t numNoMatches = 0;

    uint64_t numAllMatch00 = 0;
    uint64_t num1Match2Only00 = 0;
    uint64_t num1Match3Only00 = 0;
    uint64_t num2Match3Only00 = 0;

    uint64_t numAllMatch01 = 0;
    uint64_t num1Match2Only01 = 0;
    uint64_t num1Match3Only01 = 0;
    uint64_t num2Match3Only01 = 0;

    uint64_t numAllMatch11 = 0;
    uint64_t num1Match2Only11 = 0;
    uint64_t num1Match3Only11 = 0;
    uint64_t num2Match3Only11 = 0;

    // Loop through vcf1.
    while(vcf1.readRecord(record1, &subset1))
    {
        chrom1 = record1.getChromStr();
        pos1 = record1.get1BasedPosition();

        if(strcmp(chrom1, prevChrom.c_str()) == 0)
        {
            newChrom = false;
        }
        else
        {
            prevChrom = chrom1;
            newChrom = true;
        }

        bool found = true;
        if(!findPos(newChrom, chrom1, pos1, record2, vcf2))
        {
            // Failed to find the position, continue to the next position
            if(++numMissing2 <= myMaxErrors)
            {
                std::cerr << "Failed to find " << chrom1 << ":" << pos1 
                          << " in " << vcfName2 << ", so skipping this pos\n";
            }
            found = false;
        }
        
        if(!findPos(newChrom, chrom1, pos1, record3, vcf3))
        {
            // Failed to find the position, continue to the next position
            if(++numMissing3 <= myMaxErrors)
            {
                std::cerr << "Failed to find " << chrom1 << ":" << pos1 
                          << " in " << vcfName3 << ", so skipping this pos\n";
            }
            found = false;
        }

        if(found == false)
        {
            continue;
        }

        // Found the position in all files.
        
        // Validate that the reference & alternate alleles are the same.
        const char* ref1 = record1.getRefStr();
        const char* alt1 = record1.getAltStr();
        if((strcmp(ref1, record2.getRefStr()) != 0) ||
           (strcmp(ref1, record3.getRefStr()) != 0) ||
           (strcmp(alt1, record2.getAltStr()) != 0) ||
           (strcmp(alt1, record3.getAltStr()) != 0))
        {
            if(++numMismatchRefAlt <= myMaxErrors)
            {
                std::cerr << "Mismatching ref/alt found at " << chrom1 << ":" << pos1 << ", so skipping this pos\n";
            }
            continue;
        }

        // Get the genotype information for each.
        genotypeInfoPtr1 = &(record1.getGenotypeInfo());
        genotypeInfoPtr2 = &(record2.getGenotypeInfo());
        genotypeInfoPtr3 = &(record3.getGenotypeInfo());

        // Loop through all the samples in vcf1.
        // Get the Genotype Information.
        for(int i = 0; i < numSamples; i++)
        {
            const std::string* genotypeVal1 = genotypeInfoPtr1->getString(gtField, i);
            const std::string* genotypeVal2 = genotypeInfoPtr2->getString(gtField, sample2Indices[i]);
            const std::string* genotypeVal3 = genotypeInfoPtr3->getString(gtField, sample3Indices[i]);
            // Need to make sure the field was found.
            if(genotypeVal1 == NULL)
            {
                // GT not found in the first record, so just continue.
                if(++numMissingGT1 <= myMaxErrors)
                {
                    std::cerr << "Missing GT for " << header1.getSampleName(i) << " in " << vcfName1 << "\n";
                }
                continue;
            }


            if(isSame(genotypeVal1, genotypeVal2))
            {
                // genotypeVal1 is majority, so make no change.
                if(isSame(genotypeVal1, genotypeVal3))
                {
                    ++numAllMatch;
                    if(*genotypeVal1 == "0/0")
                    {
                        ++numAllMatch00;
                    }
                    else if((*genotypeVal1 == "0/1") || 
                            (*genotypeVal1 == "1/0"))
                    {
                        ++numAllMatch01;
                    }
                    if(*genotypeVal1 == "1/1")
                    {
                        ++numAllMatch11;
                    }
                }
                else
                {
                    ++num1Match2Only;
                    if(*genotypeVal1 == "0/0")
                    {
                        ++num1Match2Only00;
                    }
                    else if((*genotypeVal1 == "0/1") || 
                            (*genotypeVal1 == "1/0"))
                    {
                        ++num1Match2Only01;
                    }
                    if(*genotypeVal1 == "1/1")
                    {
                        ++num1Match2Only11;
                    }
                }
            }
            else if(isSame(genotypeVal1, genotypeVal3))
            {
                // genotypeVal1 is majority, so make no change.
                    ++num1Match3Only;
                    if(*genotypeVal1 == "0/0")
                    {
                        ++num1Match3Only00;
                    }
                    else if((*genotypeVal1 == "0/1") || 
                            (*genotypeVal1 == "1/0"))
                    {
                        ++num1Match3Only01;
                    }
                    if(*genotypeVal1 == "1/1")
                    {
                        ++num1Match3Only11;
                    }
            }
            else if(isSame(genotypeVal2, genotypeVal3))
            {
                // genotypeVal2 is majority, so change genotypeVal1.
                genotypeInfoPtr1->setString(gtField, i, *genotypeVal2);
                ++num2Match3Only;
                if(*genotypeVal2 == "0/0")
                {
                    ++num2Match3Only00;
                }
                else if((*genotypeVal2 == "0/1") || 
                        (*genotypeVal2 == "1/0"))
                {
                    ++num2Match3Only01;
                }
                if(*genotypeVal2 == "1/1")
                {
                    ++num2Match3Only11;
                }
            }
            else
            {
                // None are the same so set to "./."
                genotypeInfoPtr1->setString(gtField, i, "./.");
                ++numNoMatches;
            }
        } // loop back to vcf1 samples.

        // Write this record.
        outputVcf.writeRecord(record1);
    } // loop back to next vcf1 record.

    std::cerr << "\n";
    if(numMissing2 > myMaxErrors)
    {
        std::cerr << "Suppressed "
                  << numMissing2 - myMaxErrors
                  << " errors about skipped positions because they were not in "
                  << vcfName2
                  << "\n";
    }

    if(numMissing3 > myMaxErrors)
    {
        std::cerr << "Suppressed "
                  << numMissing3 - myMaxErrors
                  << " errors about skipped positions because they were not in "
                  << vcfName3
                  << "\n";
    }

    if(numMismatchRefAlt > myMaxErrors)
    {
        std::cerr << "Suppressed "
                  << numMismatchRefAlt - myMaxErrors
                  << " errors about mismatched ref/alt positions\n";
    }

    if(numMissingGT1 > myMaxErrors)
    {
        std::cerr << "Suppressed "
                  << numMissingGT1 - myMaxErrors
                  << " errors about missing GT for "
                  << vcfName1
                  << "\n";
    }
    std::cerr << "\n";
    // Output the stats.
    std::cerr << "File1 = " << vcfName1 << std::endl;
    std::cerr << "File2 = " << vcfName2 << std::endl;
    std::cerr << "File3 = " << vcfName3 << std::endl;
    std::cerr << "\nType\tTotal\t0/0\t0/1|1/0\t1/1\n";
    std::cerr << "AllMatched" 
              << "\t" << numAllMatch
              << "\t" << numAllMatch00 
              << "\t" << numAllMatch01 
              << "\t" << numAllMatch11 << std::endl;
    std::cerr << "1matched2"
              << "\t" << num1Match2Only 
              << "\t" << num1Match2Only00 
              << "\t" << num1Match2Only01 
              << "\t" << num1Match2Only11 << std::endl;
    std::cerr << "1matched3"
              << "\t" << num1Match3Only 
              << "\t" << num1Match3Only00 
              << "\t" << num1Match3Only01 
              << "\t" << num1Match3Only11 << std::endl;
    std::cerr << "2matched3"
              << "\t" << num2Match3Only
              << "\t" << num2Match3Only00 
              << "\t" << num2Match3Only01 
              << "\t" << num2Match3Only11 << std::endl;
    std::cerr << "NoneMatched\t" << numNoMatches << std::endl;

    return(0);
}
コード例 #9
0
ファイル: Main.cpp プロジェクト: rtchen/gotcloud
// main function of verifyBamID
int execute(int argc, char** argv) {
  printf("verifyBamID %s -- verify identity and purity of sequence data\n"
	 "(c) 2010-2014 Hyun Min Kang, Goo Jun, and Goncalo Abecasis\n\n", VERSION);

  VerifyBamIDArgs args;
  ParameterList pl;

  BEGIN_LONG_PARAMETERS(longParameters)
    LONG_PARAMETER_GROUP("Input Files")
    LONG_STRINGPARAMETER("vcf",&args.sVcfFile)
    LONG_STRINGPARAMETER("bam",&args.sBamFile)
    LONG_STRINGPARAMETER("subset",&args.sSubsetInds)
    LONG_STRINGPARAMETER("smID",&args.sSMID)

    LONG_PARAMETER_GROUP("VCF analysis options")
    LONG_DOUBLEPARAMETER("genoError",&args.genoError)
    LONG_DOUBLEPARAMETER("minAF",&args.minAF)
    LONG_DOUBLEPARAMETER("minCallRate",&args.minCallRate)

    LONG_PARAMETER_GROUP("Individuals to compare with chip data")
    EXCLUSIVE_PARAMETER("site",&args.bSiteOnly)
    EXCLUSIVE_PARAMETER("self",&args.bSelfOnly)
    EXCLUSIVE_PARAMETER("best",&args.bFindBest)

    LONG_PARAMETER_GROUP("Chip-free optimization options")
    EXCLUSIVE_PARAMETER("free-none",&args.bFreeNone)
    EXCLUSIVE_PARAMETER("free-mix",&args.bFreeMixOnly)
    EXCLUSIVE_PARAMETER("free-refBias",&args.bFreeRefBiasOnly)
    EXCLUSIVE_PARAMETER("free-full",&args.bFreeFull)

    LONG_PARAMETER_GROUP("With-chip optimization options")
    EXCLUSIVE_PARAMETER("chip-none",&args.bChipNone)
    EXCLUSIVE_PARAMETER("chip-mix",&args.bChipMixOnly)
    EXCLUSIVE_PARAMETER("chip-refBias",&args.bChipRefBiasOnly)
    EXCLUSIVE_PARAMETER("chip-full",&args.bChipFull)

    LONG_PARAMETER_GROUP("BAM analysis options")
    LONG_PARAMETER("ignoreRG",&args.bIgnoreRG)
    LONG_PARAMETER("ignoreOverlapPair",&args.bIgnoreOverlapPair)
    LONG_PARAMETER("noEOF",&args.bNoEOF)
    LONG_PARAMETER("precise",&args.bPrecise)
    LONG_INTPARAMETER("minMapQ",&args.minMapQ)
    LONG_INTPARAMETER("maxDepth",&args.maxDepth)
    LONG_INTPARAMETER("minQ",&args.minQ)
    LONG_INTPARAMETER("maxQ",&args.maxQ)
    LONG_DOUBLEPARAMETER("grid",&args.grid)

    LONG_PARAMETER_GROUP("Modeling Reference Bias")
    LONG_DOUBLEPARAMETER("refRef",&args.pRefRef)
    LONG_DOUBLEPARAMETER("refHet",&args.pRefHet)
    LONG_DOUBLEPARAMETER("refAlt",&args.pRefAlt)

    LONG_PARAMETER_GROUP("Output options")
    LONG_STRINGPARAMETER("out",&args.sOutFile)
    LONG_PARAMETER("verbose",&args.bVerbose)
    LONG_PHONEHOME(VERSION)
  END_LONG_PARAMETERS();

  pl.Add(new LongParameters("Available Options",longParameters));
  pl.Read(argc, argv);
  pl.Status();

  // check the validity of input files
  if ( args.sVcfFile.IsEmpty() ) {
    error("--vcf [vcf file] required");
  }

  if ( args.sBamFile.IsEmpty() ) {
    error("--bam [bam file] is required");
  }

  if ( args.sOutFile.IsEmpty() ) {
    error("--out [output prefix] is required");
  }
  Logger::gLogger = new Logger((args.sOutFile + ".log").c_str(), args.bVerbose);

  if ( ! ( args.bSiteOnly || args.bSelfOnly || args.bFindBest ) ) {
    warning("--self option was autotomatically turned on by default. Specify --best option if you wanted to check across all possible samples in the VCF");
    args.bSelfOnly = true;
  }

  if ( ( args.maxDepth > 20 ) && ( !args.bPrecise ) ) {
    warning("--precise option is not turned on at --maxDepth %d : may be prone to precision errors",args.maxDepth);
  }

  if ( ( args.bChipRefBiasOnly ) && ( !args.bSelfOnly ) ) {
    error("--self must be set for --chip-refBias to work. Skipping..");
  }

  // check timestamp
  time_t t;
  time(&t);
  Logger::gLogger->writeLog("Analysis started on %s",ctime(&t));

  // load arguments
  VerifyBamID vbid(&args);

  // load input VCF and BAM files
  Logger::gLogger->writeLog("Opening Input Files");
  vbid.loadFiles(args.sBamFile.c_str(), args.sVcfFile.c_str());

  // Check which genotype-free method is used
  if ( args.bFreeNone ) {  // if no genotype-free mode is tested. skip it
    // do nothing for genotype-free estimation
    Logger::gLogger->writeLog("Skipping chip-free estimation of sample mixture");
  }
  else if ( args.bFreeMixOnly ) { // only mixture is estimated.
    // genotype-free method
    Logger::gLogger->writeLog("Performing chip-free estimation of sample mixture at fixed reference bias parameters (%lf, %lf, %lf)",args.pRefRef,args.pRefHet,args.pRefAlt);

    // scan across multiple readgroups
    for(int rg=-1; rg < vbid.nRGs - (int)args.bIgnoreRG; ++rg) {
      VerifyBamID::mixLLK mix(&vbid);
      mix.OptimizeLLK(rg);
      Logger::gLogger->writeLog("Optimal per-sample fMix = %lf, LLK0 = %lf, LLK1 = %lf\n",mix.fMix,mix.llk0,mix.llk1);
      vbid.mixOut.llk0s[rg+1] = mix.llk0;
      vbid.mixOut.llk1s[rg+1] = mix.llk1;
      vbid.mixOut.fMixs[rg+1] = mix.fMix;
    }

    //vbid.mixRefHet = 0.5;
    //vbid.mixRefAlt = 0.00;
  }
  else if ( args.bFreeRefBiasOnly ) {
    Logger::gLogger->writeLog("Performing chip-free estimation of reference-bias without sample mixture");
    for(int rg=-1; rg < vbid.nRGs - (int)args.bIgnoreRG; ++rg) {
      VerifyBamID::refBiasMixLLKFunc myFunc(&vbid, rg);
      AmoebaMinimizer myMinimizer;
      Vector startingPoint(2);
      startingPoint[0] = 0;      // pRefHet = 0.5
      startingPoint[1] = -4.595; // pRefAlt = 0.01
      myMinimizer.func = &myFunc;
      myMinimizer.Reset(2);
      myMinimizer.point = startingPoint;
      myMinimizer.Minimize(1e-6);
      double pRefHet = VerifyBamID::invLogit(myMinimizer.point[0]);
      double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[1]);
      Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf) with LLK = %lf at readGroup %d",pRefHet,pRefAlt,myMinimizer.fmin,rg);
      //vbid.setRefBiasParams(1.0, pRefHet, pRefAlt);

      vbid.mixOut.llk0s[rg+1] = myFunc.llk0;
      vbid.mixOut.llk1s[rg+1] = myFunc.llk1;
      vbid.mixOut.refHets[rg+1] = myFunc.pRefHet;
      vbid.mixOut.refAlts[rg+1] = myFunc.pRefAlt;
    }
  }
  else if ( args.bFreeFull ) {
    Logger::gLogger->writeLog("Performing chip-free estimation of reference-bias and sample mixture together");
    for(int rg = -1; rg < vbid.nRGs - args.bIgnoreRG; ++rg) {
      VerifyBamID::fullMixLLKFunc myFunc(&vbid, rg);
      AmoebaMinimizer myMinimizer;
      Vector startingPoint(3);
      startingPoint[0] = -3.91;  // start with fMix = 0.01
      startingPoint[1] = 0;      // pRefHet = 0.5
      startingPoint[2] = -4.595; // pRefAlt = 0.01
      myMinimizer.func = &myFunc;
      myMinimizer.Reset(3);
      myMinimizer.point = startingPoint;
      myMinimizer.Minimize(1e-6);
      double fMix = VerifyBamID::invLogit(myMinimizer.point[0]);
      if ( fMix > 0.5 ) 
	fMix = 1.-fMix;
      double pRefHet = VerifyBamID::invLogit(myMinimizer.point[1]);
      double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[2]);
      Logger::gLogger->writeLog("Optimal per-sample fMix = %lf\n",fMix);
      Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf) with LLK = %lf",pRefHet,pRefAlt,myMinimizer.fmin);
      //vbid.setRefBiasParams(1.0, pRefHet, pRefAlt);

      vbid.mixOut.llk0s[rg+1] = myFunc.llk0;
      vbid.mixOut.llk1s[rg+1] = myFunc.llk1;
      vbid.mixOut.fMixs[rg+1] = myFunc.fMix;
      vbid.mixOut.refHets[rg+1] = myFunc.pRefHet;
      vbid.mixOut.refAlts[rg+1] = myFunc.pRefAlt;
    }
  }
  Logger::gLogger->writeLog("calculating depth distribution");  
  vbid.calculateDepthDistribution(args.maxDepth, vbid.mixOut);

  Logger::gLogger->writeLog("finished calculating depth distribution");  

  std::vector<int> bestInds(vbid.nRGs+1,-1);
  std::vector<int> selfInds(vbid.nRGs+1,-1);

  if ( args.bChipNone ) {
    // do nothing
    Logger::gLogger->writeLog("Skipping with-chip estimation of sample mixture");
  }
  else if ( args.bChipMixOnly ) {
    Logger::gLogger->writeLog("Performing with-chip estimation of sample mixture at fixed reference bias parameter (%lf, %lf, %lf)",args.pRefRef,args.pRefHet,args.pRefAlt);
    
    for(int rg=-1; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) {
      double maxIBD = -1;
      VerifyBamID::ibdLLK ibd(&vbid);
      for(int i=0; i < (int)vbid.pGenotypes->indids.size(); ++i) {
	double fIBD = ibd.OptimizeLLK(i, rg);
	Logger::gLogger->writeLog("Comparing with individual %s.. Optimal fIBD = %lf, LLK0 = %lf, LLK1 = %lf for readgroup %d",vbid.pGenotypes->indids[i].c_str(),fIBD, ibd.llk0, ibd.llk1, rg);
	if ( maxIBD < fIBD ) {
	  bestInds[rg+1] = i;
	  vbid.bestOut.llk0s[rg+1] = ibd.llk0;
	  vbid.bestOut.llk1s[rg+1] = ibd.llk1;
	  vbid.bestOut.fMixs[rg+1] = 1-ibd.fIBD;
	  maxIBD = ibd.fIBD;
	}

	if ( ( (rg < 0) && (vbid.pPile->sBamSMID == vbid.pGenotypes->indids[i] ) ) || ( ( rg >= 0 ) && ( vbid.pPile->vsSMIDs[rg] == vbid.pGenotypes->indids[i]) ) ) {
	  selfInds[rg+1] = i;
	  vbid.selfOut.llk0s[rg+1] = ibd.llk0;
	  vbid.selfOut.llk1s[rg+1] = ibd.llk1;
	  vbid.selfOut.fMixs[rg+1] = 1-ibd.fIBD;
	}
      }

      if ( bestInds[rg+1] >= 0 ) {
	Logger::gLogger->writeLog("Best Matching Individual is %s with IBD = %lf",vbid.pGenotypes->indids[bestInds[rg+1]].c_str(),maxIBD);
	vbid.calculateDepthByGenotype(bestInds[rg+1],rg,vbid.bestOut);
      }

      if ( selfInds[rg+1] >= 0 ) {
	Logger::gLogger->writeLog("Self Individual is %s with IBD = %lf",vbid.pGenotypes->indids[selfInds[rg+1]].c_str(),vbid.selfOut.fMixs[rg+1]);
	vbid.calculateDepthByGenotype(selfInds[rg+1],rg,vbid.selfOut);
      }
    }
  }
  else if ( args.bChipRefBiasOnly ) {
    Logger::gLogger->writeLog("Performing with-chip estimation of reference-bias without sample mixture");
    if ( args.bSelfOnly ) {
      for(int rg=-1; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) {
	VerifyBamID::refBiasIbdLLKFunc myFunc(&vbid, rg);
	AmoebaMinimizer myMinimizer;
	Vector startingPoint(2);
	startingPoint[0] = 0;      // pRefHet = 0.5
	startingPoint[1] = -4.595; // pRefAlt = 0.01
	myMinimizer.func = &myFunc;
	myMinimizer.Reset(2);
	myMinimizer.point = startingPoint;
	myMinimizer.Minimize(1e-6);
	double pRefHet = VerifyBamID::invLogit(myMinimizer.point[0]);
	double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[1]);
	Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf) with LLK = %lf",pRefHet,pRefAlt,myMinimizer.fmin);
	//vbid.setRefBiasParams(1.0, pRefHet, pRefAlt);

	vbid.selfOut.llk0s[rg+1] = myFunc.llk0;
	vbid.selfOut.llk1s[rg+1] = myFunc.llk1;
	vbid.selfOut.refHets[rg+1] = myFunc.pRefHet;
	vbid.selfOut.refAlts[rg+1] = myFunc.pRefAlt;
	vbid.calculateDepthByGenotype(0,rg,vbid.selfOut);
      }
    }
    else {
      Logger::gLogger->warning("--self must be set for --chip-refBias to work. Skipping..");
    }
  }
  else if ( args.bChipFull ) {
    Logger::gLogger->writeLog("Performing with-chip estimation of reference-bias and sample mixture together");
    for(int rg=-1; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) {
      double maxIBD = -1;

      for(int i=0; i < (int)vbid.pGenotypes->indids.size(); ++i) {
	VerifyBamID::fullIbdLLKFunc myFunc(&vbid,i,rg);
	AmoebaMinimizer myMinimizer;
	Vector startingPoint(3);
	startingPoint[0] = 3.91;  // start with fIBD = 0.99
	startingPoint[1] = 0;      // pRefHet = 0.5
	startingPoint[2] = -4.595; // pRefAlt = 0.01
	myMinimizer.func = &myFunc;

	myFunc.indIdx = i;
	myMinimizer.Reset(3);
	myMinimizer.point = startingPoint;
	myMinimizer.Minimize(1e-6);
	double fIBD = VerifyBamID::invLogit(myMinimizer.point[0]);
	double pRefHet = VerifyBamID::invLogit(myMinimizer.point[1]);
	double pRefAlt = VerifyBamID::invLogit(myMinimizer.point[2]);

	Logger::gLogger->writeLog("Comparing with individual %s.. Optimal fIBD = %lf, LLK0 = %lf, LLK1 = %lf for readgroup %d",vbid.pGenotypes->indids[i].c_str(), fIBD, myFunc.llk0, myFunc.llk1, rg);
	//Logger::gLogger->writeLog("Optimal per-sample fIBD = %lf, ",fIBD);
	Logger::gLogger->writeLog("Reference Bias Estimated as ( Pr[refBase|HET] = %lf, Pr[refBase|ALT] = %lf ) with LLK = %lf",pRefHet,pRefAlt,myMinimizer.fmin);
	if ( maxIBD < fIBD ) {
	  bestInds[rg+1] = i;
	  maxIBD = fIBD;
	  vbid.bestOut.llk0s[rg+1] = myFunc.llk0;
	  vbid.bestOut.llk1s[rg+1] = myFunc.llk1;
	  vbid.bestOut.fMixs[rg+1] = 1.-myFunc.fIBD;
	  vbid.bestOut.refHets[rg+1] = myFunc.pRefHet;
	  vbid.bestOut.refAlts[rg+1] = myFunc.pRefAlt;
	}

	if ( ( (rg < 0) && (vbid.pPile->sBamSMID == vbid.pGenotypes->indids[i] ) ) || ( ( rg >= 0 ) && ( vbid.pPile->vsSMIDs[rg] == vbid.pGenotypes->indids[i]) ) ) {
	  selfInds[rg+1] = i;
	  vbid.selfOut.llk0s[rg+1] = myFunc.llk0;
	  vbid.selfOut.llk1s[rg+1] = myFunc.llk1;
	  vbid.selfOut.fMixs[rg+1] = 1.-myFunc.fIBD;
	  vbid.selfOut.refHets[rg+1] = myFunc.pRefHet;
	  vbid.selfOut.refAlts[rg+1] = myFunc.pRefAlt;
	  vbid.calculateDepthByGenotype(i, rg, vbid.selfOut);
	}
      }
      //vbid.setRefBiasParams(1.0, pRefHet, pRefAlt);
      if ( bestInds[rg+1] >= 0 ) {
	Logger::gLogger->writeLog("Best Matching Individual is %s with IBD = %lf",vbid.pGenotypes->indids[bestInds[rg+1]].c_str(),maxIBD);
	vbid.calculateDepthByGenotype(bestInds[rg+1], rg, vbid.bestOut);
      }

      if ( selfInds[rg+1] >= 0 ) {
	Logger::gLogger->writeLog("Self Individual is %s with IBD = %lf",vbid.pGenotypes->indids[selfInds[rg+1]].c_str(),vbid.selfOut.fMixs[rg+1]);
	vbid.calculateDepthByGenotype(selfInds[rg+1],rg,vbid.selfOut);
      }
    }
  }

  // PRINT OUTPUT FILE - ".selfSM"
  // [SEQ_ID]  : SAMPLE ID in the sequence file
  // [CHIP_ID] : SAMPLE ID in the chip file (NA if not available)
  // [#SNPS] : Number of markers evaluated
  // [#READS]   : Number of reads evaluated
  // [AVG_DP]   : Mean depth
  // [FREEMIX]  : Chip-free estimated alpha (% MIX in 0-1 scale), NA if unavailable
  // [FREELK1]  : Chip-free log-likelihood at estimated alpha
  // [FREELK0]  : Chip-free log-likelihood at 0% contamination
  // [CHIPIBD]  : With-chip estimated alpha (% MIX in 0-1 scale)
  // [CHIPLK1]  : With-chip log-likelihood at estimated alpha
  // [CHIPLK0]  : With-chip log-likelihood at 0% contamination
  // [DPREF]    : Depth at reference site in the chip
  // [RDPHET]   : Relative depth at HET site in the chip
  // [RDPALT]   : Relative depth at HOMALT site in the chip
  // [FREE_RF]  : Pr(Ref|Ref) site estimated without chip data
  // [FREE_RH]  : Pr(Ref|Het) site estimated without chip data
  // [FREE_RA]  : Pr(Ref|Alt) site estimated without chip data
  // [CHIP_RF]  : Pr(Ref|Ref) site estimated with chip data
  // [CHIP_RH]  : Pr(Ref|Het) site estimated with chip data
  // [CHIP_RA]  : Pr(Ref|Alt) site estimated with chip data
  // [DPREF]    : Depth at reference alleles
  // [RDPHET]   : Relative depth at heterozygous alleles
  // [RDPALT]   : Relative depth at hom-alt alleles

  String selfSMFN = args.sOutFile + ".selfSM";
  String bestSMFN = args.sOutFile + ".bestSM";
  String selfRGFN = args.sOutFile + ".selfRG";
  String bestRGFN = args.sOutFile + ".bestRG";
  String dpSMFN = args.sOutFile + ".depthSM";
  String dpRGFN = args.sOutFile + ".depthRG";

  IFILE selfSMF = ifopen(selfSMFN,"wb");
  IFILE bestSMF = (args.bFindBest ? ifopen(bestSMFN,"wb") : NULL);
  IFILE selfRGF = (args.bIgnoreRG ? NULL : ifopen(selfRGFN,"wb"));
  IFILE bestRGF = (args.bFindBest && !args.bIgnoreRG) ? ifopen(bestRGFN,"wb") : NULL;

  IFILE dpSMF = ifopen(dpSMFN,"wb");
  IFILE dpRGF = (args.bIgnoreRG ? NULL : ifopen(dpRGFN,"wb"));
  if ( selfSMF == NULL ) {
    Logger::gLogger->error("Cannot write to %s",selfSMF);
  }
  if ( args.bFindBest && ( bestSMF == NULL ) ) {
    Logger::gLogger->error("Cannot write to %s",bestSMF);
  }
  if ( dpSMF == NULL ) {
    Logger::gLogger->error("Cannot write to %s",dpSMF);
  }

  ifprintf(dpSMF,"#RG\tDEPTH\t#SNPs\t%%SNPs\t%%CUMUL\n");
  int nCumMarkers = 0;
  for(int i=args.maxDepth; i >= 0; --i) {
    nCumMarkers += vbid.mixOut.depths[i];
    ifprintf(dpSMF,"ALL\t%d\t%d\t%.5lf\t%.5lf\n",i, vbid.mixOut.depths[i],(double) vbid.mixOut.depths[i]/(double)vbid.nMarkers,(double)nCumMarkers/(double)vbid.nMarkers);
  }
  ifclose(dpSMF);


  if ( dpRGF != NULL ) {
    ifprintf(dpRGF,"#RG\tDEPTH\t#SNPs\t%%SNPs\t%%CUMUL\n");
    for(int rg=0; rg < (vbid.nRGs - (int)args.bIgnoreRG); ++rg) {
      const char* rgID = vbid.pPile->vsRGIDs[rg].c_str();

      int nMarkers = 0;
      for(int i=args.maxDepth; i >= 0; --i) {
	nMarkers += vbid.mixOut.depths[(rg+1)*(args.maxDepth+1) + i];
      }

      nCumMarkers = 0;
      for(int i=args.maxDepth; i >= 0; --i) {
	int d = vbid.mixOut.depths[(rg+1)*(args.maxDepth+1) + i];
	nCumMarkers += d;
	ifprintf(dpRGF,"%s\t%d\t%d\t%.5lf\t%.5lf\n",rgID,i,d,(double)d/(double)vbid.nMarkers,(double)nCumMarkers/(double)nMarkers);
      }
    }
    ifclose(dpRGF);
  }

  const char* headers[] = {"#SEQ_ID","RG","CHIP_ID","#SNPS","#READS","AVG_DP","FREEMIX","FREELK1","FREELK0","FREE_RH","FREE_RA","CHIPMIX","CHIPLK1","CHIPLK0","CHIP_RH","CHIP_RA","DPREF","RDPHET","RDPALT"};
  int nheaders = sizeof(headers)/sizeof(headers[0]);

  for(int i=0; i < nheaders; ++i) { ifprintf(selfSMF,"%s%s",i>0 ? "\t" : "",headers[i]); }
  ifprintf(selfSMF,"\n");
  ifprintf(selfSMF,"%s\tALL",vbid.pPile->sBamSMID.c_str());
  ifprintf(selfSMF,"\t%s",selfInds[0] >= 0 ? vbid.pGenotypes->indids[selfInds[0]].c_str() : "NA");
  ifprintf(selfSMF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[0],(double)vbid.mixOut.numReads[0]/(double)vbid.nMarkers);
  if ( args.bFreeNone ) { ifprintf(selfSMF,"\tNA\tNA\tNA\tNA\tNA"); }
  else if ( args.bFreeMixOnly ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0]); }
  else if ( args.bFreeRefBiasOnly ) { ifprintf(selfSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); }
  else if ( args.bFreeFull ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); }
  else { error("Invalid option in handling bFree"); }

  if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(selfSMF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); }
  else if ( args.bChipMixOnly ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.fMixs[0],vbid.selfOut.llk1s[0],vbid.selfOut.llk0s[0],(double)vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[1], (double)vbid.selfOut.numReads[2]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[2], (double)vbid.selfOut.numReads[3]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[3]); }
  else if ( args.bChipMixOnly ) { ifprintf(selfSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.llk1s[0], vbid.selfOut.llk0s[0], vbid.selfOut.refHets[0], vbid.selfOut.refAlts[0], (double)vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[1], (double)vbid.selfOut.numReads[2]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[2], (double)vbid.selfOut.numReads[3]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[3]); }
  else if ( args.bChipFull ) { ifprintf(selfSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.selfOut.fMixs[0], vbid.selfOut.llk1s[0], vbid.selfOut.llk0s[0], vbid.selfOut.refHets[0], vbid.selfOut.refAlts[0], (double)vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[1], (double)vbid.selfOut.numReads[2]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[2], (double)vbid.selfOut.numReads[3]*vbid.selfOut.numGenos[1]/vbid.selfOut.numReads[1]/vbid.selfOut.numGenos[3]); }
  else { error("Invalid option in handling bChip"); }
  ifprintf(selfSMF,"\n");
  ifclose(selfSMF);

  if ( bestSMF != NULL ) {
    for(int i=0; i < nheaders; ++i) { ifprintf(bestSMF,"%s%s",i>0 ? "\t" : "",headers[i]); }
    ifprintf(bestSMF,"\n");
    ifprintf(bestSMF,"%s\tALL",vbid.pPile->sBamSMID.c_str());
    ifprintf(bestSMF,"\t%s",bestInds[0] >= 0 ? vbid.pGenotypes->indids[bestInds[0]].c_str() : "NA");
    ifprintf(bestSMF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[0],(double)vbid.mixOut.numReads[0]/(double)vbid.nMarkers);
    if ( args.bFreeNone ) { ifprintf(bestSMF,"\tNA\tNA\tNA\tNA\tNA"); }
    else if ( args.bFreeMixOnly ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0]); }
    else if ( args.bFreeRefBiasOnly ) { ifprintf(bestSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); }
    else if ( args.bFreeFull ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[0],vbid.mixOut.llk1s[0],vbid.mixOut.llk0s[0],vbid.mixOut.refHets[0],vbid.mixOut.refAlts[0]); }
    else { error("Invalid option in handling bFree"); }
    
    if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(bestSMF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); }
    else if ( args.bChipMixOnly ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.fMixs[0],vbid.bestOut.llk1s[0],vbid.bestOut.llk0s[0],(double)vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[1], (double)vbid.bestOut.numReads[2]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[2], (double)vbid.bestOut.numReads[3]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[3]); }
    else if ( args.bChipMixOnly ) { ifprintf(bestSMF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.llk1s[0], vbid.bestOut.llk0s[0], vbid.bestOut.refHets[0], vbid.bestOut.refAlts[0], (double)vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[1], (double)vbid.bestOut.numReads[2]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[2], (double)vbid.bestOut.numReads[3]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[3]); }
    else if ( args.bChipFull ) { ifprintf(bestSMF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.bestOut.fMixs[0], vbid.bestOut.llk1s[0], vbid.bestOut.llk0s[0], vbid.bestOut.refHets[0], vbid.bestOut.refAlts[0], (double)vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[1], (double)vbid.bestOut.numReads[2]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[2], (double)vbid.bestOut.numReads[3]*vbid.bestOut.numGenos[1]/vbid.bestOut.numReads[1]/vbid.bestOut.numGenos[3]); }
    else { error("Invalid option in handling bChip"); }
    ifprintf(bestSMF,"\n");
    ifclose(bestSMF);
  }

  if ( selfRGF != NULL ) {
    for(int i=0; i < nheaders; ++i) { ifprintf(selfRGF,"%s%s",i>0 ? "\t" : "",headers[i]); }
    ifprintf(selfRGF,"\n");
    for(int rg=0; rg < vbid.nRGs; ++rg) {
      ifprintf(selfRGF,"%s\t%s",vbid.pPile->sBamSMID.c_str(),vbid.pPile->vsRGIDs[rg].c_str());
      ifprintf(selfRGF,"\t%s",bestInds[rg] >= 0 ? vbid.pGenotypes->indids[bestInds[rg]].c_str() : "NA");
      ifprintf(selfRGF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[(rg+1)*4],(double)vbid.mixOut.numReads[(rg+1)*4]/(double)vbid.mixOut.numGenos[(rg+1)*4]);
      if ( args.bFreeNone ) { ifprintf(selfRGF,"\tNA\tNA\tNA\tNA\tNA"); }
      else if ( args.bFreeMixOnly ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1]); }
      else if ( args.bFreeRefBiasOnly ) { ifprintf(selfRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); }
      else if ( args.bFreeFull ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); }
      else { error("Invalid option in handling bFree"); }
      
      if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(selfRGF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); }
      else if ( args.bChipMixOnly ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.fMixs[rg+1], vbid.selfOut.llk1s[rg+1], vbid.selfOut.llk0s[rg+1], (double)vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+1], (double)vbid.selfOut.numReads[(rg+1)*4+2]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+2], (double)vbid.selfOut.numReads[(rg+1)*4+3]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+3]); }
      else if ( args.bChipMixOnly ) { ifprintf(selfRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.selfOut.llk1s[rg+1], vbid.selfOut.llk0s[rg+1], vbid.selfOut.refHets[rg+1], vbid.selfOut.refAlts[rg+1], (double)vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+1], (double)vbid.selfOut.numReads[(rg+1)*4+2]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4]/vbid.selfOut.numGenos[(rg+1)*4+2], (double)vbid.selfOut.numReads[(rg+1)*4+3]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+3]); }
      else if ( args.bChipFull ) { ifprintf(selfRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.selfOut.fMixs[rg+1], vbid.selfOut.llk1s[rg+1], vbid.selfOut.llk0s[rg+1], vbid.selfOut.refHets[rg+1], vbid.selfOut.refAlts[rg+1], (double)vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+1], (double)vbid.selfOut.numReads[(rg+1)*4+2]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+2], (double)vbid.selfOut.numReads[(rg+1)*4+3]*vbid.selfOut.numGenos[(rg+1)*4+1]/vbid.selfOut.numReads[(rg+1)*4+1]/vbid.selfOut.numGenos[(rg+1)*4+3]); }
      else { error("Invalid option in handling bChip"); }
      ifprintf(selfRGF,"\n");
    }
    ifclose(selfRGF);
  }

  if ( bestRGF != NULL ) {
    for(int i=0; i < nheaders; ++i) { ifprintf(bestRGF,"%s%s",i>0 ? "\t" : "",headers[i]); }
    ifprintf(bestRGF,"\n");
    for(int rg=0; rg < vbid.nRGs; ++rg) {
      ifprintf(bestRGF,"%s\t%s",vbid.pPile->sBamSMID.c_str(),vbid.pPile->vsRGIDs[rg].c_str());
      ifprintf(bestRGF,"\t%s",bestInds[rg] >= 0 ? vbid.pGenotypes->indids[bestInds[rg]].c_str() : "NA");
      ifprintf(bestRGF,"\t%d\t%d\t%.2lf",vbid.nMarkers,vbid.mixOut.numReads[(rg+1)*4],(double)vbid.mixOut.numReads[(rg+1)*4]/(double)vbid.mixOut.numGenos[(rg+1)*4]);
      if ( args.bFreeNone ) { ifprintf(bestRGF,"\tNA\tNA\tNA\tNA\tNA"); }
      else if ( args.bFreeMixOnly ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1]); }
      else if ( args.bFreeRefBiasOnly ) { ifprintf(bestRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); }
      else if ( args.bFreeFull ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf",vbid.mixOut.fMixs[rg+1],vbid.mixOut.llk1s[rg+1],vbid.mixOut.llk0s[rg+1],vbid.mixOut.refHets[rg+1],vbid.mixOut.refAlts[rg+1]); }
      else { error("Invalid option in handling bFree"); }
      
      if ( args.bChipNone || bestInds[0] < 0 ) { ifprintf(bestRGF,"\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tNA"); }
      else if ( args.bChipMixOnly ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\tNA\tNA\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.fMixs[rg+1], vbid.bestOut.llk1s[rg+1], vbid.bestOut.llk0s[rg+1], (double)vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+1], (double)vbid.bestOut.numReads[(rg+1)*4+2]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+2], (double)vbid.bestOut.numReads[(rg+1)*4+3]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+3]); }
      else if ( args.bChipMixOnly ) { ifprintf(bestRGF,"\tNA\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf",vbid.bestOut.llk1s[rg+1], vbid.bestOut.llk0s[rg+1], vbid.bestOut.refHets[rg+1], vbid.bestOut.refAlts[rg+1], (double)vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+1], (double)vbid.bestOut.numReads[(rg+1)*4+2]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4]/vbid.bestOut.numGenos[(rg+1)*4+2], (double)vbid.bestOut.numReads[(rg+1)*4+3]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+3]); }
      else if ( args.bChipFull ) { ifprintf(bestRGF,"\t%.5lf\t%.2lf\t%.2lf\t%.5lf\t%.5lf\t%.3lf\t%.4lf\t%.4lf", vbid.bestOut.fMixs[rg+1], vbid.bestOut.llk1s[rg+1], vbid.bestOut.llk0s[rg+1], vbid.bestOut.refHets[rg+1], vbid.bestOut.refAlts[rg+1], (double)vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+1], (double)vbid.bestOut.numReads[(rg+1)*4+2]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+2], (double)vbid.bestOut.numReads[(rg+1)*4+3]*vbid.bestOut.numGenos[(rg+1)*4+1]/vbid.bestOut.numReads[(rg+1)*4+1]/vbid.bestOut.numGenos[(rg+1)*4+3]); }
      else { error("Invalid option in handling bChip"); }
      ifprintf(bestRGF,"\n");
    }
    ifclose(bestRGF);
  }
  
  time(&t);
  Logger::gLogger->writeLog("Analysis finished on %s",ctime(&t));

  return 0;
}
コード例 #10
0
ファイル: VcfMac.cpp プロジェクト: statgen/vcfUtil
int VcfMac::execute(int argc, char **argv)
{
    String inputVcf = "";
    int minAC = -1;
    String sampleSubset = "";
    String filterList = "";
    bool params = false;

    IntervalTree<int> regions;
    std::vector<int> intersection;
    
    // Read in the parameters.    
    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inputVcf)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_STRINGPARAMETER("sampleSubset", &sampleSubset)
        LONG_INTPARAMETER("minAC", &minAC)
        LONG_STRINGPARAMETER("filterList", &filterList)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));
    
    inputParameters.Read(argc-1, &(argv[1]));
    
    // Check that all files were specified.
    if(inputVcf == "")
    {
        usage();
        inputParameters.Status();
        std::cerr << "Missing \"--in\", a required parameter.\n\n";
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the two input files.
    VcfFileReader inFile;
    VcfHeader header;
    VcfRecord record;

    // Open the file
    if(sampleSubset.IsEmpty())
    {
        inFile.open(inputVcf, header);        
    }
    else
    {
        inFile.open(inputVcf, header, sampleSubset, NULL, NULL);
    }
    
    // Add the discard rule for minor allele count.
    if(minAC >= 0)
    {
        inFile.addDiscardMinMinorAlleleCount(minAC, NULL);
    }
    
    if(!filterList.IsEmpty())
    {
        // Open the filter list.
        IFILE regionFile = ifopen(filterList, "r");
        String regionLine;
        StringArray regionColumn;
        int start;
        int end;
        int intervalVal = 1;
        if(regionFile == NULL)
        {
            std::cerr << "Failed to open " << filterList 
                      << ", so keeping all positions\n";
            filterList.Clear();
        }
        else
        {
            while( regionFile->isOpen() && !regionFile->ifeof())
            {
                // Read the next interval
                regionLine.Clear();
                regionLine.ReadLine(regionFile);
                if(regionLine.IsEmpty())
                {
                    // Nothing on this line, continue to the next.
                    continue;
                }
                regionColumn.ReplaceColumns(regionLine, ' ');
                if(regionColumn.Length() != 2)
                {
                    std::cerr << "Improperly formatted region line: " 
                              << regionLine << "; skipping to the next line.\n";
                    continue;
                }
                // Convert the columns to integers.
                if(!regionColumn[0].AsInteger(start))
                {
                    // The start position (1st column) is not an integer.
                    std::cerr << "Improperly formatted region line, start position "
                              << "(1st column) is not an integer: "
                              << regionColumn[0]
                              << "; Skipping to the next line.\n";
                    continue;
                }
                if(!regionColumn[1].AsInteger(end))
                {
                    // The start position (1st column) is not an integer.
                    std::cerr << "Improperly formatted region line, end position "
                              << "(2nd column) is not an integer: "
                              << regionColumn[1]
                              << "; Skipping to the next line.\n";
                    continue;
                }
                // Add 1-based inclusive intervals.
                regions.add(start,end, intervalVal);
            }
        }
    }


    int numReadRecords = 0;

    while( inFile.readRecord(record))
    {
        if(!filterList.IsEmpty())
        {
            // Check if the region should be kept.
            intersection.clear();
            regions.get_intersecting_intervals(record.get1BasedPosition(), intersection);
            
            if(intersection.empty())
            {
                // not in the interval, so continue to the next record.
                continue;
            }
        }

        ++numReadRecords;

        // Loop through the number of possible alternates.
        unsigned int numAlts = record.getNumAlts();
        int minAlleleCount = -1;
        int curAlleleCount = 0;
        int totalAlleleCount = 0;
        for(unsigned int i = 0; i <= numAlts; i++)
        {
            curAlleleCount = record.getAlleleCount(i);
            if((minAlleleCount == -1) ||
               (curAlleleCount < minAlleleCount))
            {
                minAlleleCount = curAlleleCount;
            }
            totalAlleleCount += curAlleleCount;
        }
        if(totalAlleleCount != 0)
        {
            double maf = (double)minAlleleCount/totalAlleleCount;
            std::cout << record.getIDStr()
                      << "\t" << minAlleleCount
                      << "\t" << maf << "\n";
        }
    }
    
    inFile.close();

    //    std::cerr << "\n\t# Records: " << numReadRecords << "\n";

    // return success.
    return(0);
}
コード例 #11
0
// Dump the reference information from specified SAM/BAM file.
int DumpRefInfo::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    bool noeof = false;
    bool printRecordRefs = false;
    bool params = false;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("printRecordRefs", &printRecordRefs)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the input file for reading.
    SamFile samIn;
    samIn.OpenForRead(inFile);

    // Read the sam header.
    SamFileHeader samHeader;
    samIn.ReadHeader(samHeader);

    const SamReferenceInfo& refInfo = samHeader.getReferenceInfo();
    int numReferences = refInfo.getNumEntries();
    
    for(int i = 0; i < numReferences; i++)
    {
        std::cout << "Reference Index " << i;
        std::cout << "; Name: " << refInfo.getReferenceName(i)
                  << std::endl;
    }
    if(numReferences == 0)
    {
        // There is no reference info.
        std::cerr << "The header contains no reference information.\n";
    }

    // If we are to print the references as found in the records, loop
    // through reading the records.
    if(printRecordRefs)
    {
        SamRecord samRecord;

        // Track the prev name/id.
        std::string prevName = "";
        int prevID = -2;
        int recCount = 0; // track the num records in a ref.
        // Keep reading records until ReadRecord returns false.
        while(samIn.ReadRecord(samHeader, samRecord))
        {
            const char* name = samRecord.getReferenceName();
            int id = samRecord.getReferenceID();
            if((strcmp(name, prevName.c_str()) != 0) || (id != prevID))
            {
                if(prevID != -2)
                {
                    std::cout << "\tRef ID: " << prevID
                              << "\tRef Name: " << prevName 
                              << "\tNumRecs: " << recCount
                              << std::endl;
                }
                recCount = 0;
                prevID = id;
                prevName = name;
            }
            ++recCount;
        }
        // Print the last index.
        if(prevID != -2)
        {
            std::cout << "\tRef ID: " << prevID
                      << "\tRef Name: " << prevName 
                      << "\tNumRecs: " << recCount
                      << std::endl;
        }
    }
    return(SamStatus::SUCCESS);
}