Ejemplo n.º 1
0
// NOTE: Only positions where the reference and read both have bases that
//       are different and not 'N' are considered mismatches.
uint32_t SamFilter::sumMismatchQuality(SamRecord& record, 
                                       GenomeSequence& refSequence,
                                       uint8_t defaultQualityInt)
{
    // Track the mismatch info.
    int mismatchQual = 0;
    int numMismatch = 0;

    SamQuerySeqWithRefIter sequenceIter(record, refSequence);

    SamSingleBaseMatchInfo baseMatchInfo;
    while(sequenceIter.getNextMatchMismatch(baseMatchInfo))
    {
        if(baseMatchInfo.getType() == SamSingleBaseMatchInfo::MISMATCH)
        {
            // Got a mismatch, get the associated quality.
            char readQualityChar = 
                record.getQuality(baseMatchInfo.getQueryIndex());
            uint8_t readQualityInt = 
                BaseUtilities::getPhredBaseQuality(readQualityChar);
            
            if(readQualityInt == BaseUtilities::UNKNOWN_QUALITY_INT)
            {
                // Quality was not specified, so use the configured setting.
                readQualityInt = defaultQualityInt;
            }
            mismatchQual += readQualityInt;
            ++numMismatch;
        }
    }

    return(mismatchQual);
}
Ejemplo n.º 2
0
// Finds the total base quality of a read
int Dedup_LowMem::getBaseQuality(SamRecord & record) {
    const char* baseQualities = record.getQuality();
    int readLength = record.getReadLength();
    int quality = 0.;
    if(strcmp(baseQualities, "*") == 0)
    {
        return(0);
    }
    for(int i=0; i < readLength; ++i) {
        int q = static_cast<int>(baseQualities[i])-33;
        if ( q >= myMinQual ) quality += q;
    }
    return quality;
}
Ejemplo n.º 3
0
void Bam2FastQ::writeFastQ(SamRecord& samRec, IFILE filePtr,
                             const char* readNameExt)
{
    static int16_t flag;
    static std::string sequence;
    static String quality;

    if(filePtr == NULL)
    {
        return;
    }

    flag = samRec.getFlag();
    const char* readName = samRec.getReadName();
    sequence = samRec.getSequence();
    quality = samRec.getQuality();
    
    if(SamFlag::isReverse(flag) && myReverseComp)
    {
        // It is reverse, so reverse compliment the sequence
        BaseUtilities::reverseComplement(sequence);
        // Reverse the quality.
        quality.Reverse();
    }
    else
    {
        // Ensure it is all capitalized.
        int seqLen = sequence.size();
        for (int i = 0; i < seqLen; i++)
        {
            sequence[i] = (char)toupper(sequence[i]);
        }
    }
    
    if(myRNPlus)
    {

        ifprintf(filePtr, "@%s%s\n%s\n+%s%s\n%s\n", readName, readNameExt,
                 sequence.c_str(), readName, readNameExt, quality.c_str());
    }
    else
    {
        ifprintf(filePtr, "@%s%s\n%s\n+\n%s\n", readName, readNameExt,
                 sequence.c_str(), quality.c_str());
    }
    // Release the record.
    myPool.releaseRecord(&samRec);
}
Ejemplo n.º 4
0
void Bam2FastQ::writeFastQ(SamRecord& samRec, IFILE filePtr,
                           const std::string& fileNameExt, const char* readNameExt)
{
    static int16_t flag;
    static std::string sequence;
    static String quality;
    static std::string rg;
    static std::string rgFastqExt;
    static std::string rgListStr;
    static std::string fileName;
    static std::string fq2;
    if(mySplitRG)
    {
        rg = samRec.getString("RG").c_str();
        rgFastqExt = rg + fileNameExt;

        OutFastqMap::iterator it;
        it = myOutFastqs.find(rgFastqExt);
        if(it == myOutFastqs.end())
        {
            // New file.
            fileName = myOutBase.c_str();
            if(rg != "")
            {
                fileName += '.';
            }
            else
            {
                rg = ".";
            }
            fileName += rgFastqExt;
            filePtr = ifopen(fileName.c_str(), "w", myCompression);
            myOutFastqs[rgFastqExt] = filePtr;

            if(fileNameExt != mySecondFileNameExt)
            {
                // first end.
                const char* sm = mySamHeader.getRGTagValue("SM", rg.c_str());
                if(strcmp(sm, "") == 0){sm = myOutBase.c_str();}

                rgListStr.clear();
                SamHeaderRG* rgPtr = mySamHeader.getRG(rg.c_str());
                if((rgPtr == NULL) || (!rgPtr->appendString(rgListStr)))
                {
                    // No RG info for this record.
                    rgListStr = ".\n";
                }
                fq2 = ".";
                if(fileNameExt == myFirstFileNameExt)
                {
                    fq2 = myOutBase.c_str();
                    if(rg != ".")
                    {
                        fq2 += '.';
                        fq2 += rg;
                    }
                    fq2 += mySecondFileNameExt;
                }
                ifprintf(myFqList, "%s\t%s\t%s\t%s",
                         sm, fileName.c_str(), fq2.c_str(),
                         rgListStr.c_str());
            }
        }
        else
        {
            filePtr = it->second;
        }
    }
    if(filePtr == NULL)
    {
        throw(std::runtime_error("Programming ERROR/EXITING: Bam2FastQ filePtr not set."));
        return;
    }

    flag = samRec.getFlag();
    const char* readName = samRec.getReadName();
    sequence = samRec.getSequence();
    if(myQField.IsEmpty())
    {
        // Read the quality from the quality field
        quality = samRec.getQuality();
    }
    else
    {
        // Read Quality from the specified tag
        const String* qTagPtr = samRec.getStringTag(myQField.c_str());
        if((qTagPtr != NULL) && (qTagPtr->Length() == (int)sequence.length()))
        {
            // Use the tag value for quality
            quality = qTagPtr->c_str();
        }
        else
        {
            // Tag was not found, so use the quality field.
            ++myNumQualTagErrors;
            if(myNumQualTagErrors == 1)
            {
                std::cerr << "Bam2FastQ: " << myQField.c_str() 
                          << " tag was not found/invalid, so using the quality field in records without the tag\n";
            }
            quality = samRec.getQuality();
        }
    }
    
    if(SamFlag::isReverse(flag) && myReverseComp)
    {
        // It is reverse, so reverse compliment the sequence
        BaseUtilities::reverseComplement(sequence);
        // Reverse the quality.
        quality.Reverse();
    }
    else
    {
        // Ensure it is all capitalized.
        int seqLen = sequence.size();
        for (int i = 0; i < seqLen; i++)
        {
            sequence[i] = (char)toupper(sequence[i]);
        }
    }
    
    if(myRNPlus)
    {

        ifprintf(filePtr, "@%s%s\n%s\n+%s%s\n%s\n", readName, readNameExt,
                 sequence.c_str(), readName, readNameExt, quality.c_str());
    }
    else
    {
        ifprintf(filePtr, "@%s%s\n%s\n+\n%s\n", readName, readNameExt,
                 sequence.c_str(), quality.c_str());
    }
    // Release the record.
    myPool.releaseRecord(&samRec);
}
Ejemplo n.º 5
0
bool Recab::processReadApplyTable(SamRecord& samRecord)
{
    static BaseData data;
    static std::string readGroup;
    static std::string aligTypes;

    int seqLen = samRecord.getReadLength();

    uint16_t  flag = samRecord.getFlag();

    // Check if the flag contains an exclude.
    if((flag & myIntApplyExcludeFlags) != 0)
    {
        // Do not apply the recalibration table to this read.
        ++myNumApplySkipped;
        return(false);
    }
    ++myNumApplyReads;
   
    readGroup = samRecord.getString("RG").c_str();

    // Look for the read group in the map.
    // TODO - extra string constructor??
    RgInsertReturn insertRet = 
        myRg2Id.insert(std::pair<std::string, uint16_t>(readGroup, 0));
    if(insertRet.second == true)
    {
        // New element inserted.
        insertRet.first->second = myId2Rg.size();
        myId2Rg.push_back(readGroup);
    }

    data.rgid = insertRet.first->second;

    if(!myQField.IsEmpty())
    {
        // Check if there is an old quality.
        const String* oldQPtr =
            samRecord.getStringTag(myQField.c_str());
        if((oldQPtr != NULL) && (oldQPtr->Length() == seqLen))
        {
            // There is an old quality, so use that.
            myQualityStrings.oldq = oldQPtr->c_str();
        }
        else
        {
            myQualityStrings.oldq = samRecord.getQuality();
        }
    }
    else
    {
        myQualityStrings.oldq = samRecord.getQuality();
    }

    if(myQualityStrings.oldq.length() != (unsigned int)seqLen)
    {
        Logger::gLogger->warning("Quality is not the correct length, so skipping recalibration on that record.");
        return(false);
    }

    myQualityStrings.newq.resize(seqLen);

    ////////////////
    ////// iterate sequence
    ////////////////
    int32_t seqPos = 0;
    int seqIncr = 1;

    bool reverse;
    if(SamFlag::isReverse(flag))
    {
        reverse = true;
        seqPos = seqLen - 1;
        seqIncr = -1;
    }
    else
        reverse = false;

    // Check which read - this will be the same for all positions, so 
    // do this outside of the smaller loop.
    if(!SamFlag::isPaired(flag) || SamFlag::isFirstFragment(flag))
        // Mark as first if it is not paired or if it is the
        // first in the pair.
        data.read = 0;
    else
        data.read = 1;

    // Set unsetbase for curBase.
    // This will be used for the prebase of cycle 0.
    data.curBase = 'K';

    for (data.cycle = 0; data.cycle < seqLen; data.cycle++, seqPos += seqIncr)
    {
        // Set the preBase to the previous cycle's current base.
        // For cycle 0, curBase was set to a default value.
        data.preBase = data.curBase;

        // Get the current base.
        data.curBase = samRecord.getSequence(seqPos);

        if(reverse)
        {
            // Complement the current base.
            data.curBase =
                BaseAsciiMap::base2complement[(unsigned int)(data.curBase)];
        }

        // Get quality
        data.qual = 
            BaseUtilities::getPhredBaseQuality(myQualityStrings.oldq[seqPos]);

        // skip bases with quality below the minimum set.
        if(data.qual < myMinBaseQual)
        {
            myQualityStrings.newq[seqPos] = myQualityStrings.oldq[seqPos];
            continue;
        }

        // Update quality score
        uint8_t qemp = hasherrormodel.getQemp(data);
        qemp = mySqueeze.getQualCharFromQemp(qemp);
        if(qemp > myMaxBaseQualChar)
        {
            qemp = myMaxBaseQualChar;
        }
        myQualityStrings.newq[seqPos] = qemp;
    }

    if(!myStoreQualTag.IsEmpty())
    {
        samRecord.addTag(myStoreQualTag, 'Z', myQualityStrings.oldq.c_str());
    }
    samRecord.setQuality(myQualityStrings.newq.c_str());

    return true;
}
Ejemplo n.º 6
0
bool Recab::processReadBuildTable(SamRecord& samRecord)
{
    static BaseData data;
    static std::string chromosomeName;
    static std::string readGroup;
    static std::string aligTypes;

    int seqLen = samRecord.getReadLength();
    
    // Check if the parameters have been processed.
    if(!myParamsSetup)
    {
        // This throws an exception if the reference cannot be setup.
        processParams();
    }

    uint16_t  flag = samRecord.getFlag();

    if(!SamFlag::isMapped(flag))
    {
        // Unmapped, skip processing
        ++myUnMappedCount;
    }
    else
    {
        // This read is mapped.
        ++myMappedCount;
    }

    if(SamFlag::isSecondary(flag))
    {
        // Secondary read
        ++mySecondaryCount;
    }
    if(SamFlag::isDuplicate(flag))
    {
        ++myDupCount;
    }
    if(SamFlag::isQCFailure(flag))
    {
        ++myQCFailCount;
    }

    // Check if the flag contains an exclude.
    if((flag & myIntBuildExcludeFlags) != 0)
    {
        // Do not use this read for building the recalibration table.
        ++myNumBuildSkipped;
        return(false);
    }

    if(samRecord.getMapQuality() == 0)
    {
        // 0 mapping quality, so skip processing.
        ++myMapQual0Count;
        ++myNumBuildSkipped;
        return(false);
    }
    if(samRecord.getMapQuality() == 255)
    {
        // 255 mapping quality, so skip processing.
        ++myMapQual255Count;
        ++myNumBuildSkipped;
        return(false);
    }
    
    chromosomeName = samRecord.getReferenceName();
    readGroup = samRecord.getString("RG").c_str();

    // Look for the read group in the map.
    // TODO - extra string constructor??
    RgInsertReturn insertRet = 
        myRg2Id.insert(std::pair<std::string, uint16_t>(readGroup, 0));
    if(insertRet.second == true)
    {
        // New element inserted.
        insertRet.first->second = myId2Rg.size();
        myId2Rg.push_back(readGroup);
    }

    data.rgid = insertRet.first->second;


    //reverse
    bool reverse;
    if(SamFlag::isReverse(flag))
        reverse = true;
    else
        reverse = false;

    if(myReferenceGenome == NULL)
    {
        throw std::runtime_error("Failed to setup Reference File.\n");
    }

    genomeIndex_t mapPos = 
        myReferenceGenome->getGenomePosition(chromosomeName.c_str(), 
                                             samRecord.get1BasedPosition());

    if(mapPos==INVALID_GENOME_INDEX)
    {
    	Logger::gLogger->warning("INVALID_GENOME_INDEX (chrom:pos %s:%ld) and record skipped... Reference in BAM is different from the ref used here!", chromosomeName.c_str(), samRecord.get1BasedPosition());

        ++myNumBuildSkipped;
        return false;
    }

    if(!myQField.IsEmpty())
    {
        // Check if there is an old quality.
        const String* oldQPtr = 
            samRecord.getStringTag(myQField.c_str());
        if((oldQPtr != NULL) && (oldQPtr->Length() == seqLen))
        {
            // There is an old quality, so use that.
            myQualityStrings.oldq = oldQPtr->c_str();
        }
        else
        {
            // Tag was not found, so use the current quality.
            ++myNumQualTagErrors;
            if(myNumQualTagErrors == 1)
            {
                Logger::gLogger->warning("Recab: %s tag was not found/invalid, so using the quality field in records without the tag", myQField.c_str());
            }
            myQualityStrings.oldq = samRecord.getQuality();
        }
        //printf("%s\n",samRecord.getQuality());
        //printf("%s:%s\n",myQField.c_str(),temp.c_str());
    }
    else
    {
        myQualityStrings.oldq = samRecord.getQuality();
    }

    if(myQualityStrings.oldq.length() != (unsigned int)seqLen)
    {
        Logger::gLogger->warning("Quality is not the correct length, so skipping recalibration on that record.");
        ++myNumBuildSkipped;
        return(false);
    }

    aligTypes = "";
    Cigar* cigarPtr = samRecord.getCigarInfo();

    if(cigarPtr == NULL)
    {
        Logger::gLogger->warning("Failed to get the cigar");
        ++myNumBuildSkipped;
        return(false);
    }

    // This read will be used for building the recab table.
    ++myNumBuildReads;

    ////////////////
    ////// iterate sequence
    ////////////////
    genomeIndex_t refPos = 0;
    int32_t refOffset = 0;
    int32_t prevRefOffset = Cigar::INDEX_NA;
    int32_t seqPos = 0;
    int seqIncr = 1;
    if(reverse)
    {
        seqPos = seqLen - 1;
        seqIncr = -1;
    }

    // read
    if(!SamFlag::isPaired(flag) || SamFlag::isFirstFragment(flag))
        // Mark as first if it is not paired or if it is the
        // first in the pair.
        data.read = 0;
    else
        data.read = 1;

    // Set unsetbase for curBase.
    // This will be used for the prebase of cycle 0.
    data.curBase = 'K';

    for (data.cycle = 0; data.cycle < seqLen; data.cycle++, seqPos += seqIncr)
    {
        // Store the previous current base in preBase.
        data.preBase = data.curBase;

        // Get the current base before checking if we are going to
        // process this position so it will be set for the next position.
        data.curBase = samRecord.getSequence(seqPos);
        if(reverse)
        {
            // Complement the current base.
            // The prebase is already complemented.
            data.curBase = 
                BaseAsciiMap::base2complement[(unsigned int)(data.curBase)];
        }
        
        // Get the reference offset.
        refOffset = cigarPtr->getRefOffset(seqPos);
        if(refOffset == Cigar::INDEX_NA)
        {
            // Not a match/mismatch, so continue to the next one which will
            // not have a previous match/mismatch.
            // Set previous ref offset to a negative so
            // the next one won't be kept.
            prevRefOffset = -2;
            continue;
        }

        // This one is a match.
        refPos = mapPos + refOffset;

        // Check to see if we should process this position.
        // Do not process if it is cycle 0 and:
        //   1) current base is in dbsnp
        if(data.cycle == 0)
        {
            if(!(myDbsnpFile.IsEmpty()) && myDbSNP[refPos])
            {
                // Save the previous reference offset.
                ++myNumDBSnpSkips;
                prevRefOffset = refOffset;
                continue;
            }
        }
        else
        {
            // Do not process if it is not cycle 0 and:
            //   1) previous reference position not adjacent 
            //      (not a match/mismatch)
            //   2) previous base is in dbsnp
            //   3) current base is in dbsnp
            if((!myKeepPrevNonAdjacent && (refOffset != (prevRefOffset + seqIncr))) ||
               (data.preBase == 'K'))
            {
                // Save the previous reference offset.
                prevRefOffset = refOffset;
                continue;
            }
            if(!(myDbsnpFile.IsEmpty()) && 
               (myDbSNP[refPos] ||
                (!myKeepPrevDbsnp && myDbSNP[refPos - seqIncr])))
            {
                ++myNumDBSnpSkips;
                // Save the previous reference offset.
                prevRefOffset = refOffset;
                continue;
            }
       }
        
        // Save the previous reference offset.
        prevRefOffset = refOffset;

        // Set the reference & read bases in the Covariates
        char refBase = (*myReferenceGenome)[refPos];

        if(BaseUtilities::isAmbiguous(refBase))
        {
            // N reference, so skip it when building the table.
            ++myAmbiguous;
            continue;
        }

        if(reverse)
        {
            refBase = BaseAsciiMap::base2complement[(unsigned int)(refBase)];
        }

        // Get quality char
        data.qual = 
            BaseUtilities::getPhredBaseQuality(myQualityStrings.oldq[seqPos]);

        // skip bases with quality below the minimum set.
        if(data.qual < myMinBaseQual)
        {
            ++mySubMinQual;
            continue;
        }

        if(BaseUtilities::areEqual(refBase, data.curBase)
           && (BaseAsciiMap::base2int[(unsigned int)(data.curBase)] < 4))
            myBMatchCount++;
        else
            myBMismatchCount++;

        hasherrormodel.setCell(data, refBase);
        myBasecounts++;
    }
    return true;
}
Ejemplo n.º 7
0
int Stats::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    String indexFile = "";
    bool basic = false;
    bool noeof = false;
    bool params = false;
    bool qual = false;
    bool phred = false;
    int maxNumReads = -1;
    bool unmapped = false;
    String pBaseQC = "";
    String cBaseQC = "";
    String regionList = "";
    int excludeFlags = 0;
    int requiredFlags = 0;
    bool withinRegion = false;
    int minMapQual = 0;
    String dbsnp = "";
    PosList *dbsnpListPtr = NULL;
    bool baseSum = false;
    int bufferSize = PileupHelper::DEFAULT_WINDOW_SIZE;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_PARAMETER_GROUP("Types of Statistics")
        LONG_PARAMETER("basic", &basic)
        LONG_PARAMETER("qual", &qual)
        LONG_PARAMETER("phred", &phred)
        LONG_STRINGPARAMETER("pBaseQC", &pBaseQC)
        LONG_STRINGPARAMETER("cBaseQC", &cBaseQC)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_INTPARAMETER("maxNumReads", &maxNumReads)
        LONG_PARAMETER("unmapped", &unmapped)
        LONG_STRINGPARAMETER("bamIndex", &indexFile)
        LONG_STRINGPARAMETER("regionList", &regionList)
        LONG_INTPARAMETER("excludeFlags", &excludeFlags)
        LONG_INTPARAMETER("requiredFlags", &requiredFlags)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("Optional phred/qual Only Parameters")
        LONG_PARAMETER("withinRegion", &withinRegion)
        LONG_PARAMETER_GROUP("Optional BaseQC Only Parameters")
        LONG_PARAMETER("baseSum", &baseSum)
        LONG_INTPARAMETER("bufferSize", &bufferSize)
        LONG_INTPARAMETER("minMapQual", &minMapQual)
        LONG_STRINGPARAMETER("dbsnp", &dbsnp)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    inputParameters.Read(argc-1, &(argv[1]));

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument for stats, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Use the index file if unmapped or regionList is not empty.
    bool useIndex = (unmapped|| (!regionList.IsEmpty()));

    // IndexFile is required, so check to see if it has been set.
    if(useIndex && (indexFile == ""))
    {
        // In file was not specified, so set it to the in file
        // + ".bai"
        indexFile = inFile + ".bai";
    }
    ////////////////////////////////////////
    // Setup in case pileup is used.
    Pileup<PileupElementBaseQCStats> pileup(bufferSize);
    // Initialize start/end positions.
    myStartPos = 0;
    myEndPos = -1;
    
    // Open the output qc file if applicable.
    IFILE baseQCPtr = NULL;
    if(!pBaseQC.IsEmpty() && !cBaseQC.IsEmpty())
    {
        usage();
        inputParameters.Status();
        // Cannot specify both types of baseQC.
        std::cerr << "Cannot specify both --pBaseQC & --cBaseQC." << std::endl;
        return(-1);
    }
    else if(!pBaseQC.IsEmpty())
    {
        baseQCPtr = ifopen(pBaseQC, "w");
        PileupElementBaseQCStats::setPercentStats(true);
    }
    else if(!cBaseQC.IsEmpty())
    {
        baseQCPtr = ifopen(cBaseQC, "w");
        PileupElementBaseQCStats::setPercentStats(false);
    }

    if(baseQCPtr != NULL)
    {
        PileupElementBaseQCStats::setOutputFile(baseQCPtr);
        PileupElementBaseQCStats::printHeader();
    }
    if((baseQCPtr != NULL) || baseSum)
    {
        PileupElementBaseQCStats::setMapQualFilter(minMapQual);
        PileupElementBaseQCStats::setBaseSum(baseSum);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the file for reading.
    SamFile samIn;
    if(!samIn.OpenForRead(inFile))
    {
        fprintf(stderr, "%s\n", samIn.GetStatusMessage());
        return(samIn.GetStatus());
    }

    samIn.SetReadFlags(requiredFlags, excludeFlags);

    // Set whether or not basic statistics should be generated.
    samIn.GenerateStatistics(basic);

    // Read the sam header.
    SamFileHeader samHeader;
    if(!samIn.ReadHeader(samHeader))
    {
        fprintf(stderr, "%s\n", samIn.GetStatusMessage());
        return(samIn.GetStatus());
    }

    // Open the bam index file for reading if we are
    // doing unmapped reads (also set the read section).
    if(useIndex)
    {
        samIn.ReadBamIndex(indexFile);

        if(unmapped)
        {
            samIn.SetReadSection(-1);
        }

        if(!regionList.IsEmpty())
        {
            myRegionList = ifopen(regionList, "r");
        }
    }

    //////////////////////////
    // Read dbsnp if specified and doing baseQC
    if(((baseQCPtr != NULL) || baseSum) && (!dbsnp.IsEmpty()))
    {
        // Read the dbsnp file.
        IFILE fdbSnp;
        fdbSnp = ifopen(dbsnp,"r");
        // Determine how many entries.
        const SamReferenceInfo& refInfo = samHeader.getReferenceInfo();
        int maxRefLen = 0;
        for(int i = 0; i < refInfo.getNumEntries(); i++)
        {
            int refLen = refInfo.getReferenceLength(i);
            if(refLen >= maxRefLen)
            {
                maxRefLen = refLen + 1;
            }
        }
        
        dbsnpListPtr = new PosList(refInfo.getNumEntries(),maxRefLen);

        if(fdbSnp==NULL)
        {
            std::cerr << "Open dbSNP file " << dbsnp.c_str() << " failed!\n";
        }
        else if(dbsnpListPtr == NULL)
        {
            std::cerr << "Failed to init the memory allocation for the dbsnpList.\n";
        }
        else
        {
            // Read the dbsnp file.
            StringArray tokens;
            String buffer;
            int position = 0;
            int refID = 0;

            // Loop til the end of the file.
            while (!ifeof(fdbSnp))
            {
                // Read the next line.
                buffer.ReadLine(fdbSnp);
                // If it does not have at least 2 columns, 
                // continue to the next line.
                if (buffer.IsEmpty() || buffer[0] == '#') continue;
                tokens.AddTokens(buffer);
                if(tokens.Length() < 2) continue;

                if(!tokens[1].AsInteger(position))
                {
                    std::cerr << "Improperly formatted region line, start position "
                              << "(2nd column) is not an integer: "
                              << tokens[1]
                              << "; Skipping to the next line.\n";         
                    continue;
                }

                // Look up the reference name.
                refID = samHeader.getReferenceID(tokens[0]);
                if(refID != SamReferenceInfo::NO_REF_ID)
                {
                    // Reference id was found, so add it to the dbsnp
                    dbsnpListPtr->addPosition(refID, position);
                }
        
                tokens.Clear();
                buffer.Clear();
            }
        }
        ifclose(fdbSnp);
    }

    // Read the sam records.
    SamRecord samRecord;

    int numReads = 0;

    //////////////////////
    // Setup in case doing a quality count.
    // Quality histogram.
    const int MAX_QUAL = 126;
    const int START_QUAL = 33;
    uint64_t qualCount[MAX_QUAL+1];
    for(int i = 0; i <= MAX_QUAL; i++)
    {
        qualCount[i] = 0;
    }
    
    const int START_PHRED = 0;
    const int PHRED_DIFF = START_QUAL - START_PHRED;
    const int MAX_PHRED = MAX_QUAL - PHRED_DIFF;
    uint64_t phredCount[MAX_PHRED+1];
    for(int i = 0; i <= MAX_PHRED; i++)
    {
        phredCount[i] = 0;
    }
    
    int refPos = 0;
    Cigar* cigarPtr = NULL;
    char cigarChar = '?';
    // Exclude clips from the qual/phred counts if unmapped reads are excluded.
    bool qualExcludeClips = excludeFlags & SamFlag::UNMAPPED;

    //////////////////////////////////
    // When not reading by sections, getNextSection returns true
    // the first time, then false the next time.
    while(getNextSection(samIn))
    {
        // Keep reading records from the file until SamFile::ReadRecord
        // indicates to stop (returns false).
        while(((maxNumReads < 0) || (numReads < maxNumReads)) && samIn.ReadRecord(samHeader, samRecord))
        {
            // Another record was read, so increment the number of reads.
            ++numReads;
            // See if the quality histogram should be genereated.
            if(qual || phred)
            {
                // Get the quality.
                const char* qual = samRecord.getQuality();
                // Check for no quality ('*').
                if((qual[0] == '*') && (qual[1] == 0))
                {
                    // This record does not have a quality string, so no 
                    // quality processing is necessary.
                }
                else
                {
                    int index = 0;
                    cigarPtr = samRecord.getCigarInfo();
                    cigarChar = '?';
                    refPos = samRecord.get0BasedPosition();
                    if(!qualExcludeClips && (cigarPtr != NULL))
                    {
                        // Offset the reference position by any soft clips
                        // by subtracting the queryIndex of this start position.
                        // refPos is now the start position of the clips.
                        refPos -= cigarPtr->getQueryIndex(0);
                    }

                    while(qual[index] != 0)
                    {
                        // Skip this quality if it is clipped and we are skipping clips.
                        if(cigarPtr != NULL)
                        {
                            cigarChar = cigarPtr->getCigarCharOpFromQueryIndex(index);
                        }
                        if(qualExcludeClips && Cigar::isClip(cigarChar))
                        {
                            // Skip a clipped quality.
                            ++index;
                            // Increment the position.
                            continue;
                        }

                        if(withinRegion && (myEndPos != -1) && (refPos >= myEndPos))
                        {
                            // We have hit the end of the region, stop processing this
                            // quality string.
                            break;
                        }

                        if(withinRegion && (refPos < myStartPos))
                        {
                            // This position is not in the target.
                            ++index;
                            // Update the position if this is found in the reference or a clip.
                            if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                            {
                                ++refPos;
                            }
                            continue;
                        }

                        // Check for valid quality.
                        if((qual[index] < START_QUAL) || (qual[index] > MAX_QUAL))
                        {
                            if(qual)
                            {
                                std::cerr << "Invalid Quality found: " << qual[index] 
                                          << ".  Must be between "
                                          << START_QUAL << " and " << MAX_QUAL << ".\n";
                            }
                            if(phred)
                            {
                                std::cerr << "Invalid Phred Quality found: " << qual[index] - PHRED_DIFF
                                          << ".  Must be between "
                                          << START_QUAL << " and " << MAX_QUAL << ".\n";
                            }
                            // Skip an invalid quality.
                            ++index;
                            // Update the position if this is found in the reference or a clip.
                            if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                            {
                                ++refPos;
                            }
                            continue;
                        }
                        
                        // Increment the count for this quality.
                        ++(qualCount[(int)(qual[index])]);
                        ++(phredCount[(int)(qual[index]) - PHRED_DIFF]);
                        // Update the position if this is found in the reference or a clip.
                        if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                        {
                            ++refPos;
                        }
                        ++index;
                    }
                }
            }

            // Check the next thing to do for the read.
            if((baseQCPtr != NULL) || baseSum)
            {
                // Pileup the bases for this read.
                pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr);
            }
        }

        // Done with a section, move on to the next one.

        // New section, so flush the pileup.
        pileup.flushPileup();
    }

    // Flush the rest of the pileup.
    if((baseQCPtr != NULL) || baseSum)
    {
        // Pileup the bases.
        pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr);
        PileupElementBaseQCStats::printSummary();
        ifclose(baseQCPtr);
    }

    std::cerr << "Number of records read = " << 
        samIn.GetCurrentRecordCount() << std::endl;

    if(basic)
    {
        std::cerr << std::endl;
        samIn.PrintStatistics();
    }

    // Print the quality stats.
    if(qual)
    {
        std::cerr << std::endl;
        std::cerr << "Quality\tCount\n";
        for(int i = START_QUAL; i <= MAX_QUAL; i++)
        {
            std::cerr << i << "\t" << qualCount[i] << std::endl;
        }
    }
    // Print the phred quality stats.
    if(phred)
    {
        std::cerr << std::endl;
        std::cerr << "Phred\tCount\n";
        for(int i = START_PHRED; i <= MAX_PHRED; i++)
        {
            std::cerr << i << "\t" << phredCount[i] << std::endl;
        }
    }

    SamStatus::Status status = samIn.GetStatus();
    if(status == SamStatus::NO_MORE_RECS)
    {
        // A status of NO_MORE_RECS means that all reads were successful.
        status = SamStatus::SUCCESS;
    }

    return(status);
}
Ejemplo n.º 8
0
void validateRead1ModQuality(SamRecord& samRecord)
{
    //////////////////////////////////////////
    // Validate Record 1
    // Create record structure for validating.
    int expectedBlockSize = 89;
    const char* expectedReferenceName = "1";
    const char* expectedMateReferenceName = "1";
    const char* expectedMateReferenceNameOrEqual = "=";

    bamRecordStruct* expectedRecordPtr =
        (bamRecordStruct *) malloc(expectedBlockSize + sizeof(int));

    char tag[3];
    char type;
    void* value;
    bamRecordStruct* bufferPtr;
    unsigned char* varPtr;

    expectedRecordPtr->myBlockSize = expectedBlockSize;
    expectedRecordPtr->myReferenceID = 0;
    expectedRecordPtr->myPosition = 1010;
    expectedRecordPtr->myReadNameLength = 23;
    expectedRecordPtr->myMapQuality = 0;
    expectedRecordPtr->myBin = 4681;
    expectedRecordPtr->myCigarLength = 2;
    expectedRecordPtr->myFlag = 73;
    expectedRecordPtr->myReadLength = 5;
    expectedRecordPtr->myMateReferenceID = 0;
    expectedRecordPtr->myMatePosition = 1010;
    expectedRecordPtr->myInsertSize = 0;
   
    // Check the alignment end
    assert(samRecord.get0BasedAlignmentEnd() == 1016);
    assert(samRecord.get1BasedAlignmentEnd() == 1017);
    assert(samRecord.getAlignmentLength() == 7);
    assert(samRecord.get0BasedUnclippedStart() == 1010);
    assert(samRecord.get1BasedUnclippedStart() == 1011);
    assert(samRecord.get0BasedUnclippedEnd() == 1016);
    assert(samRecord.get1BasedUnclippedEnd() == 1017);

    // Check the accessors.
    assert(samRecord.getBlockSize() == expectedRecordPtr->myBlockSize);
    assert(samRecord.getReferenceID() == expectedRecordPtr->myReferenceID);
    assert(strcmp(samRecord.getReferenceName(), expectedReferenceName) == 0);
    assert(samRecord.get1BasedPosition() == expectedRecordPtr->myPosition + 1);
    assert(samRecord.get0BasedPosition() == expectedRecordPtr->myPosition);
    assert(samRecord.getReadNameLength() == 
           expectedRecordPtr->myReadNameLength);
    assert(samRecord.getMapQuality() == expectedRecordPtr->myMapQuality);
    assert(samRecord.getBin() == expectedRecordPtr->myBin);
    assert(samRecord.getCigarLength() == expectedRecordPtr->myCigarLength);
    assert(samRecord.getFlag() == expectedRecordPtr->myFlag);
    assert(samRecord.getReadLength() == expectedRecordPtr->myReadLength);
    assert(samRecord.getMateReferenceID() ==
           expectedRecordPtr->myMateReferenceID);
    assert(strcmp(samRecord.getMateReferenceName(), 
                  expectedMateReferenceName) == 0);
    assert(strcmp(samRecord.getMateReferenceNameOrEqual(), 
                  expectedMateReferenceNameOrEqual) == 0);
    assert(samRecord.get1BasedMatePosition() == 
           expectedRecordPtr->myMatePosition + 1);
    assert(samRecord.get0BasedMatePosition() ==
           expectedRecordPtr->myMatePosition);
    assert(samRecord.getInsertSize() == expectedRecordPtr->myInsertSize);
    assert(strcmp(samRecord.getReadName(), "1:1011:F:255+17M15D20M") == 0);
    assert(strcmp(samRecord.getCigar(), "5M2D") == 0);
    assert(strcmp(samRecord.getSequence(), "CCGAA") == 0);
    assert(strcmp(samRecord.getQuality(), "ABCDE") == 0);
    assert(samRecord.getNumOverlaps(1010, 1017) == 5);
    assert(samRecord.getNumOverlaps(1010, 1016) == 5);
    assert(samRecord.getNumOverlaps(1012, 1017) == 3);
    assert(samRecord.getNumOverlaps(1015, 1017) == 0);
    assert(samRecord.getNumOverlaps(1017, 1010) == 0);
    assert(samRecord.getNumOverlaps(1013, 1011) == 0);
    assert(samRecord.getNumOverlaps(-1, 1017) == 5);

    // Reset the tag iter, since the tags have already been read.
    samRecord.resetTagIter();

    // Check the tags.
    assert(samRecord.getNextSamTag(tag, type, &value) == true);
    assert(tag[0] == 'A');
    assert(tag[1] == 'M');
    assert(type == 'i');
    assert(*(char*)value == 0);
    assert(samRecord.getNextSamTag(tag, type, &value) == true);
    assert(tag[0] == 'M');
    assert(tag[1] == 'D');
    assert(type == 'Z');
    assert(*(String*)value == "37");
    assert(samRecord.getNextSamTag(tag, type, &value) == true);
    assert(tag[0] == 'N');
    assert(tag[1] == 'M');
    assert(type == 'i');
    assert(*(char*)value == 0);
    assert(samRecord.getNextSamTag(tag, type, &value) == true);
    assert(tag[0] == 'X');
    assert(tag[1] == 'T');
    assert(type == 'A');
    assert(*(char*)value == 'R');
    // No more tags, should return false.
    assert(samRecord.getNextSamTag(tag, type, &value) == false);
    assert(samRecord.getNextSamTag(tag, type, &value) == false);

    // Get the record ptr.   
    bufferPtr = (bamRecordStruct*)samRecord.getRecordBuffer();
    // Validate the buffers match.
    assert(bufferPtr->myBlockSize == expectedRecordPtr->myBlockSize);
    assert(bufferPtr->myReferenceID == expectedRecordPtr->myReferenceID);
    assert(bufferPtr->myPosition == expectedRecordPtr->myPosition);
    assert(bufferPtr->myReadNameLength == expectedRecordPtr->myReadNameLength);
    assert(bufferPtr->myMapQuality == expectedRecordPtr->myMapQuality);
    assert(bufferPtr->myBin == expectedRecordPtr->myBin);
    assert(bufferPtr->myCigarLength == expectedRecordPtr->myCigarLength);
    assert(bufferPtr->myFlag == expectedRecordPtr->myFlag);
    assert(bufferPtr->myReadLength == expectedRecordPtr->myReadLength);
    assert(bufferPtr->myMateReferenceID ==
           expectedRecordPtr->myMateReferenceID);
    assert(bufferPtr->myMatePosition == expectedRecordPtr->myMatePosition);
    assert(bufferPtr->myInsertSize == expectedRecordPtr->myInsertSize);

    // Validate the variable length fields in the buffer.
    // Set the pointer to the start of the variable fields.
    varPtr = (unsigned char*)(&(bufferPtr->myData[0]));

    // Validate the readname.
    for(int i = 0; i < expectedRecordPtr->myReadNameLength; i++)
    {
        assert(*varPtr == samRecord.getReadName()[i]);
        varPtr++;
    }

    // Validate the cigar.
    // The First cigar is 5M which is 5 << 4 | 0 = 80
    assert(*(unsigned int*)varPtr == 80);
    // Increment the varptr the size of an int.
    varPtr += 4;
    // The 2nd cigar is 2D which is 2 << 4 | 2 = 34
    assert(*(unsigned int*)varPtr == 34);
    // Increment the varptr the size of an int.
    varPtr += 4;
   
    // Validate the sequence.
    // CC = 0x22
    assert(*varPtr == 0x22);
    varPtr++;
    // GA = 0x41
    assert(*varPtr == 0x41);
    varPtr++;
    // A  = 0x10
    assert(*varPtr == 0x10);
    varPtr++;
  
    // Validate the Quality
    for(int i = 0; i < expectedRecordPtr->myReadLength; i++)
    {
        assert(*varPtr == samRecord.getQuality()[i] - 33);
        varPtr++;
    }

    // Validate the tags.  
    assert(*varPtr == 'A');
    varPtr++;
    assert(*varPtr == 'M');
    varPtr++;
    assert(*varPtr == 'C');
    varPtr++;
    assert(*varPtr == 0);
    varPtr++;
    assert(*varPtr == 'M');
    varPtr++;
    assert(*varPtr == 'D');
    varPtr++;
    assert(*varPtr == 'Z');
    varPtr++;
    assert(*varPtr == '3');
    varPtr++;
    assert(*varPtr == '7');
    varPtr++;
    assert(*varPtr == 0);
    varPtr++;
    assert(*varPtr == 'N');
    varPtr++;
    assert(*varPtr == 'M');
    varPtr++;
    assert(*varPtr == 'C');
    varPtr++;
    assert(*varPtr == 0);
    varPtr++;
    assert(*varPtr == 'X');
    varPtr++;
    assert(*varPtr == 'T');
    varPtr++;
    assert(*varPtr == 'A');
    varPtr++;
    assert(*varPtr == 'R');
    varPtr++;
}
Ejemplo n.º 9
0
// main function
int TrimBam::execute(int argc, char ** argv)
{
  SamFile samIn;
  SamFile samOut;
  int numTrimBaseL = 0;
  int numTrimBaseR = 0;
  bool noeof = false;
  bool ignoreStrand = false;
  bool noPhoneHome = false;
  std::string inName = "";
  std::string outName = "";

  if ( argc < 5 ) {
    usage();
    std::cerr << "ERROR: Incorrect number of parameters specified\n";
    return(-1);
  }
  inName = argv[2];
  outName = argv[3];

  static struct option getopt_long_options[] = {
      // Input options
      { "left", required_argument, NULL, 'L'},
      { "right", required_argument, NULL, 'R'},
      { "ignoreStrand", no_argument, NULL, 'i'},
      { "noeof", no_argument, NULL, 'n'},
      { "noPhoneHome", no_argument, NULL, 'p'},
      { "nophonehome", no_argument, NULL, 'P'},
      { "phoneHomeThinning", required_argument, NULL, 't'},
      { "phonehomethinning", required_argument, NULL, 'T'},
      { NULL, 0, NULL, 0 },
  };
  
  int argIndex = 4;
  if(argv[argIndex][0] != '-')
  {
      // This is the number of bases to trim off both sides
      // so convert to a number.
      numTrimBaseL = atoi(argv[argIndex]);
      numTrimBaseR = numTrimBaseL;
      ++argIndex;
  }

  int c = 0;
  int n_option_index = 0;
  // Process any additional parameters
  while ( ( c = getopt_long(argc, argv,
                            "L:R:in", getopt_long_options, &n_option_index) )
          != -1 )
  {
      switch(c) 
      {
          case 'L':
              numTrimBaseL = atoi(optarg);
              break;
          case 'R':
              numTrimBaseR = atoi(optarg);
              break;
          case 'i':
              ignoreStrand = true;
              break;
          case 'n':
              noeof = true;
              break;
          case 'p':
          case 'P':
              noPhoneHome = true;
              break;
          case 't':
          case 'T':
              PhoneHome::allThinning = atoi(optarg);
              break;
          default:
              fprintf(stderr,"ERROR: Unrecognized option %s\n",
                      getopt_long_options[n_option_index].name);
              return(-1);
      }
  }

  if(!noPhoneHome)
  {
      PhoneHome::checkVersion(getProgramName(), VERSION);
  }
  
  if(noeof)
  {
      // Set that the eof block is not required.
      BgzfFileType::setRequireEofBlock(false);
  }

  if ( ! samIn.OpenForRead(inName.c_str()) ) {
      fprintf(stderr, "***Problem opening %s\n",inName.c_str());
    return(-1);
  }

  if(!samOut.OpenForWrite(outName.c_str())) {
    fprintf(stderr, "%s\n", samOut.GetStatusMessage());
    return(samOut.GetStatus());
  }
  
  fprintf(stderr,"Arguments in effect: \n");
  fprintf(stderr,"\tInput file : %s\n",inName.c_str());
  fprintf(stderr,"\tOutput file : %s\n",outName.c_str());
  if(numTrimBaseL == numTrimBaseR)
  {
      fprintf(stderr,"\t#Bases to trim from each side : %d\n", numTrimBaseL);
  }
  else
  {
      fprintf(stderr,"\t#Bases to trim from the left of forward strands : %d\n",
              numTrimBaseL);
      fprintf(stderr,"\t#Bases to trim from the right of forward strands: %d\n",
              numTrimBaseR);
      if(!ignoreStrand)
      {
          // By default, reverse strands are treated the opposite.
          fprintf(stderr,"\t#Bases to trim from the left of reverse strands : %d\n",
                  numTrimBaseR);
          fprintf(stderr,"\t#Bases to trim from the right of reverse strands : %d\n",
                  numTrimBaseL);
      }
      else
      {
          // ignore strand, treating forward & reverse strands the same
          fprintf(stderr,"\t#Bases to trim from the left of reverse strands : %d\n",
                  numTrimBaseL);
          fprintf(stderr,"\t#Bases to trim from the right of reverse strands : %d\n",
                  numTrimBaseR);
      }
  }
 
   // Read the sam header.
   SamFileHeader samHeader;
   if(!samIn.ReadHeader(samHeader))
   {
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
      return(samIn.GetStatus());
   }

   // Write the sam header.
   if(!samOut.WriteHeader(samHeader))
   {
      fprintf(stderr, "%s\n", samOut.GetStatusMessage());
      return(samOut.GetStatus());     
   }

   SamRecord samRecord;
   char seq[65536];
   char qual[65536];
   int i, len;

   // Keep reading records until ReadRecord returns false.
   while(samIn.ReadRecord(samHeader, samRecord)) {
     // Successfully read a record from the file, so write it.
     strcpy(seq,samRecord.getSequence());
     strcpy(qual,samRecord.getQuality());

     // Number of bases to trim from the left/right,
     // set based on ignoreStrand flag and strand info.
     int trimLeft = numTrimBaseL;
     int trimRight = numTrimBaseR;
     if(!ignoreStrand)
     {
         if(SamFlag::isReverse(samRecord.getFlag()))
         {
             // We are reversing the reverse reads,
             // so swap the left & right trim counts.
             trimRight = numTrimBaseL;
             trimLeft = numTrimBaseR;
         }
     }

     len = strlen(seq);
     // Do not trim if sequence is '*'
     if ( strcmp(seq, "*") != 0 ) {
       bool qualValue = true;
       if(strcmp(qual, "*") == 0)
       {
           qualValue = false;
       }
       int qualLen = strlen(qual);
       if ( (qualLen != len) && qualValue ) {
         fprintf(stderr,"ERROR: Sequence and Quality have different length\n");
         return(-1);
       }
       if ( len < (trimLeft + trimRight) ) {
         // Read Length is less than the total number of bases to trim,
         // so trim the entire read.
         for(i=0; i < len; ++i) {
           seq[i] = 'N';
           if ( qualValue ) {
             qual[i] = '!';
           }
         }
       }
       else
       {
           // Read Length is larger than the total number of bases to trim,
           // so trim from the left, then from the right.
           for(i=0; i < trimLeft; ++i)
           {
               // Trim the bases from the left.
               seq[i] = 'N';
               if ( qualValue )
               {
                   qual[i] = '!';
               }
           }
           for(i = 0; i < trimRight; i++)
           {
               seq[len-i-1] = 'N';
               if(qualValue)
               {
                   qual[len-i-1] = '!';
               }
           }
       }
       samRecord.setSequence(seq);
       samRecord.setQuality(qual);
     }

     if(!samOut.WriteRecord(samHeader, samRecord)) {
         // Failed to write a record.
       fprintf(stderr, "Failure in writing record %s\n", samOut.GetStatusMessage());
       return(-1);
     }
   }
   
   if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
   {
      // Failed to read a record.
      fprintf(stderr, "%s\n", samIn.GetStatusMessage());
   }   
   
   std::cerr << std::endl << "Number of records read = " << 
     samIn.GetCurrentRecordCount() << std::endl;
   std::cerr << "Number of records written = " << 
     samOut.GetCurrentRecordCount() << std::endl;

   if(samIn.GetStatus() != SamStatus::NO_MORE_RECS)
   {
     // Failed reading a record.
     return(samIn.GetStatus());
   }

   // Since the reads were successful, return the status based
   samIn.Close();
   samOut.Close();
   return 0;
}
Ejemplo n.º 10
0
// Add an entry to this pileup element.  
void PileupElementBaseQual::addEntry(SamRecord& record)
{
    // Call the base class:
    PileupElement::addEntry(record);

    if(myRefAllele.empty())
    {
    	genomeIndex_t markerIndex = (*myRefSeq).getGenomePosition(getChromosome(), static_cast<uint32_t>(getRefPosition()+1));
        myRefAllele = (*myRefSeq)[markerIndex];
    }

    // Increment the index
    ++myIndex;
    
    // if the index has gone beyond the allocated space, double the size.
    if(myIndex >= myAllocatedSize)
    {
        char* tempBuffer = (char*)realloc(myBases, myAllocatedSize * 2);
        if(tempBuffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myBases = tempBuffer;
        int8_t* tempInt8Buffer = (int8_t*)realloc(myMapQualities, myAllocatedSize * 2 * sizeof(int8_t));
        if(tempInt8Buffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myMapQualities = tempInt8Buffer; 
        tempInt8Buffer = (int8_t*)realloc(myQualities, myAllocatedSize * 2 * sizeof(int8_t));
        if(tempInt8Buffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myQualities = tempInt8Buffer;
        tempBuffer = (char*)realloc(myStrands, myAllocatedSize * 2);
        if(tempBuffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myStrands = tempBuffer;
        tempInt8Buffer = (int8_t*)realloc(myCycles, myAllocatedSize * 2 * sizeof(int8_t));
        if(tempInt8Buffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myCycles = tempInt8Buffer; 
        int16_t* tempInt16Buffer = (int16_t*)realloc(myGLScores, myAllocatedSize * 2 * sizeof(int16_t));
        if(tempInt8Buffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myGLScores = tempInt16Buffer;
        myAllocatedSize = myAllocatedSize * 2;
    }

    Cigar* cigar = record.getCigarInfo();
    
    if(cigar == NULL)
    {
        throw std::runtime_error("Failed to retrieve cigar info from the record.");
    }

    int32_t readIndex = 
        cigar->getQueryIndex(getRefPosition(), record.get0BasedPosition());

    // If the readPosition is N/A, this is a deletion.
    if(readIndex != CigarRoller::INDEX_NA)
    {
        char base = record.getSequence(readIndex);
        int8_t mapQual = record.getMapQuality();
        //-33 to obtain the PHRED base quality
        char qual = record.getQuality(readIndex) - 33;
        if(qual == UNSET_QUAL)
        {
            qual = ' ';
        }
        char strand = (record.getFlag() & 0x0010) ? 'R' : 'F';
        int cycle = strand == 'F' ? readIndex + 1 : record.getReadLength() -  readIndex;
        myBases[myIndex] = base;
        myMapQualities[myIndex] = mapQual;
        myQualities[myIndex] = qual;
        myStrands[myIndex] = strand;
        myCycles[myIndex] = cycle;
    }
    else if(myAddDelAsBase)
    {
        int8_t mapQual = record.getMapQuality();
        char strand = (record.getFlag() & 0x0010) ? 'R' : 'F';
        myBases[myIndex] = '-';
        myMapQualities[myIndex] = mapQual;
        myQualities[myIndex] = -1;
        myStrands[myIndex] = strand;
        myCycles[myIndex] = -1;
    }
    else
    {
        // Do not add a deletion.
        // Did not add any entries, so decrement the index counter since the
        // index was not used.
        --myIndex;
    }
}