void testAddHeaderAndTagToFile(const char* inputName, const char* outputName) { SamFile inSam, outSam; assert(inSam.OpenForRead(inputName)); assert(outSam.OpenForWrite(outputName)); // Read the SAM Header. SamFileHeader samHeader; assert(inSam.ReadHeader(samHeader)); // Add a header line. assert(samHeader.addHeaderLine("@RG\tID:myID\tSM:mySM") == false); assert(samHeader.addHeaderLine("@RG\tID:myID3\tSM:mySM") == true); // Write Header assert(outSam.WriteHeader(samHeader)); SamRecord samRecord; assert(inSam.ReadRecord(samHeader, samRecord)); // validateRead1(samRecord); // Add two tags. assert(samRecord.addIntTag("XA", 123)); assert(samRecord.addIntTag("XA", 456)); assert(samRecord.addTag("RR", 'Z', "myID1")); assert(samRecord.addTag("RR", 'Z', "myID2")); // Write as Sam. assert(outSam.WriteRecord(samHeader, samRecord)); // TODO, add test to verify it was written correctly. // Read a couple of records to make sure it properly can read them even // if they are bigger than the original. assert(inSam.ReadRecord(samHeader, samRecord)); assert(inSam.ReadRecord(samHeader, samRecord)); // Check the MD tag, which requires the reference. GenomeSequence reference("testFiles/chr1_partial.fa"); assert(SamTags::isMDTagCorrect(samRecord, reference) == false); String newMDTag; SamTags::createMDTag(newMDTag, samRecord, reference); assert(newMDTag == "2T1N0"); assert(SamTags::updateMDTag(samRecord, reference)); // Write as Sam. assert(outSam.WriteRecord(samHeader, samRecord)); }
// build the read group library map void Dedup_LowMem::buildReadGroupLibraryMap(SamFileHeader& header) { rgidLibMap.clear(); numLibraries = 0; std::map<std::string,uint32_t> libNameMap; SamHeaderRecord * headerRecord = header.getNextRGRecord(); while(headerRecord != NULL) { std::string ID = headerRecord->getTagValue("ID"); std::string LB = headerRecord->getTagValue("LB"); if ( ID.empty() ) { std::string headerRecordString; headerRecord->appendString(headerRecordString); Logger::gLogger->error("Cannot find readGroup ID information in the header line %s", headerRecordString.c_str()); } if ( rgidLibMap.find(ID) != rgidLibMap.end() ) { Logger::gLogger->error("The readGroup ID %s is not a unique identifier",ID.c_str()); } if ( LB.empty() ) { std::string headerRecordString; headerRecord->appendString(headerRecordString); Logger::gLogger->warning("Cannot find library information in the header line %s. Using empty string for library name", headerRecordString.c_str()); } if ( libNameMap.find( LB ) != libNameMap.end() ) { rgidLibMap[ID] = libNameMap[LB]; } else { numLibraries = libNameMap.size()+1; libNameMap[LB] = numLibraries; rgidLibMap[ID] = numLibraries; } headerRecord = header.getNextRGRecord(); } if (numLibraries > 0xff) { Logger::gLogger->error("More than 255 library names are identified. Dedup_LowMem currently only allows up to 255 library names"); } }
void testCopyHeader(SamFileHeader& samHeader) { // Copy the header. SamFileHeader samHeader2; SamHeaderRecord* recPtr = samHeader.getNextHeaderRecord(); while(recPtr != NULL) { samHeader2.addRecordCopy(*recPtr); recPtr = samHeader.getNextHeaderRecord(); } // Add the comments. std::string nextComment = samHeader.getNextComment(); while(nextComment != SamFileHeader::EMPTY_RETURN) { samHeader2.addComment(nextComment.c_str()); nextComment = samHeader.getNextComment(); } // Validate the header. validateHeader(samHeader2); }
SamFile::SortedType SamFile::getSortOrderFromHeader(SamFileHeader& header) { const char* tag = header.getSortOrder(); // Default to unsorted since if it is not specified in the header // that is the value that should be used. SortedType headerSortOrder = UNSORTED; if(strcmp(tag, "queryname") == 0) { headerSortOrder = QUERY_NAME; } else if(strcmp(tag, "coordinate") == 0) { headerSortOrder = COORDINATE; } return(headerSortOrder); }
// Get the number of unmapped reads in the specified reference id. // Returns -1 for out of range refIDs. int32_t SamFile::getNumUnMappedReadsFromIndex(const char* refName, SamFileHeader& header) { // The bam index must have already been read. if(myBamIndex == NULL) { myStatus.setStatus(SamStatus::FAIL_ORDER, "Cannot get num unmapped reads from the index until it has been read."); return(false); } int32_t refID = BamIndex::REF_ID_UNMAPPED; if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0)) { // Reference name specified, so read just the "-1" entries. refID = header.getReferenceID(refName); } return(myBamIndex->getNumUnMappedReads(refID)); }
bool SamFile::processNewSection(SamFileHeader &header) { myNewSection = false; // If there is no index file, return failure. if(myBamIndex == NULL) { // No bam index has been read. myStatus.setStatus(SamStatus::FAIL_ORDER, "Cannot read section since there is no index file open"); throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the BAM Index file.")); return(false); } // If there is not a BAM file open for reading, return failure. if(!myIsBamOpenForRead) { // There is not a BAM file open for reading. myStatus.setStatus(SamStatus::FAIL_ORDER, "Cannot read section since there is no bam file open"); throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section without opening a BAM file.")); return(false); } if(myHasHeader == false) { // The header has not yet been read. myStatus.setStatus(SamStatus::FAIL_ORDER, "Cannot read record since the header has not been read."); throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the header.")); return(false); } // Indexed Bam open for read, so disable read buffering because iftell // will be used. // Needs to be done here after we already know that the header has been // read. myFilePtr->disableBuffering(); myChunksToRead.clear(); // Reset the end of the current chunk. We are resetting our read, so // we no longer have a "current chunk" that we are reading. myCurrentChunkEnd = 0; // Check to see if the read section was set based on the reference name // but not yet converted to reference id. if(!myRefName.empty()) { myRefID = header.getReferenceID(myRefName.c_str()); // Clear the myRefName length so this code is only executed once. myRefName.clear(); // Check to see if a reference id was found. if(myRefID == SamReferenceInfo::NO_REF_ID) { myStatus = SamStatus::NO_MORE_RECS; return(false); } } // Get the chunks associated with this reference region. if(myBamIndex->getChunksForRegion(myRefID, myStartPos, myEndPos, myChunksToRead) == true) { myStatus = SamStatus::SUCCESS; } else { String errorMsg = "Failed to get the specified region, refID = "; errorMsg += myRefID; errorMsg += "; startPos = "; errorMsg += myStartPos; errorMsg += "; endPos = "; errorMsg += myEndPos; myStatus.setStatus(SamStatus::FAIL_PARSE, errorMsg); } return(true); }
// Validate that the record is sorted compared to the previously read record // if there is one, according to the specified sort order. // If the sort order is UNSORTED, true is returned. bool SamFile::validateSortOrder(SamRecord& record, SamFileHeader& header) { if(myRefPtr != NULL) { record.setReference(myRefPtr); } record.setSequenceTranslation(myReadTranslation); bool status = false; if(mySortedType == UNSORTED) { // Unsorted, so nothing to validate, just return true. status = true; } else { // Check to see if mySortedType is based on the header. if(mySortedType == FLAG) { // Determine the sorted type from what was read out of the header. mySortedType = getSortOrderFromHeader(header); } if(mySortedType == QUERY_NAME) { // Validate that it is sorted by query name. // Get the query name from the record. const char* readName = record.getReadName(); // Check if it is sorted either in samtools way or picard's way. if((myPrevReadName.Compare(readName) > 0) && (strcmp(myPrevReadName.c_str(), readName) > 0)) { // return false. String errorMessage = "ERROR: File is not sorted by read name at record "; errorMessage += myRecordCount; errorMessage += "\n\tPrevious record was "; errorMessage += myPrevReadName; errorMessage += ", but this record is "; errorMessage += readName; myStatus.setStatus(SamStatus::INVALID_SORT, errorMessage.c_str()); status = false; } else { myPrevReadName = readName; status = true; } } else { // Validate that it is sorted by COORDINATES. // Get the leftmost coordinate and the reference index. int32_t refID = record.getReferenceID(); int32_t coord = record.get0BasedPosition(); // The unmapped reference id is at the end of a sorted file. if(refID == BamIndex::REF_ID_UNMAPPED) { // A new reference ID that is for the unmapped reads // is always valid. status = true; myPrevRefID = refID; myPrevCoord = coord; } else if(myPrevRefID == BamIndex::REF_ID_UNMAPPED) { // Previous reference ID was for unmapped reads, but the // current one is not, so this is not sorted. String errorMessage = "ERROR: File is not coordinate sorted at record "; errorMessage += myRecordCount; errorMessage += "\n\tPrevious record was unmapped, but this record is "; errorMessage += header.getReferenceLabel(refID) + ":" + coord; myStatus.setStatus(SamStatus::INVALID_SORT, errorMessage.c_str()); status = false; } else if(refID < myPrevRefID) { // Current reference id is less than the previous one, //meaning that it is not sorted. String errorMessage = "ERROR: File is not coordinate sorted at record "; errorMessage += myRecordCount; errorMessage += "\n\tPrevious record was "; errorMessage += header.getReferenceLabel(myPrevRefID) + ":" + myPrevCoord; errorMessage += ", but this record is "; errorMessage += header.getReferenceLabel(refID) + ":" + coord; myStatus.setStatus(SamStatus::INVALID_SORT, errorMessage.c_str()); status = false; } else { // The reference IDs are in the correct order. if(refID > myPrevRefID) { // New reference id, so set the previous coordinate to -1 myPrevCoord = -1; } // Check the coordinates. if(coord < myPrevCoord) { // New Coord is less than the previous position. String errorMessage = "ERROR: File is not coordinate sorted at record "; errorMessage += myRecordCount; errorMessage += "\n\tPreviousRecord was "; errorMessage += header.getReferenceLabel(myPrevRefID) + ":" + myPrevCoord; errorMessage += ", but this record is "; errorMessage += header.getReferenceLabel(refID) + ":" + coord; myStatus.setStatus(SamStatus::INVALID_SORT, errorMessage.c_str()); status = false; } else { myPrevRefID = refID; myPrevCoord = coord; status = true; } } } } return(status); }
int Bam2FastQ::execute(int argc, char **argv) { // Extract command line arguments. String inFile = ""; bool readName = false; String refFile = ""; String outBase = ""; String firstOut = ""; String secondOut = ""; String unpairedOut = ""; bool interleave = false; bool noeof = false; bool params = false; myNumMateFailures = 0; myNumPairs = 0; myNumUnpaired = 0; myReverseComp = true; myRNPlus = false; myFirstRNExt = DEFAULT_FIRST_EXT; mySecondRNExt = DEFAULT_SECOND_EXT; ParameterList inputParameters; BEGIN_LONG_PARAMETERS(longParameterList) LONG_PARAMETER_GROUP("Required Parameters") LONG_STRINGPARAMETER("in", &inFile) LONG_PARAMETER_GROUP("Optional Parameters") LONG_PARAMETER("readName", &readName) LONG_PARAMETER("merge", &interleave) LONG_STRINGPARAMETER("refFile", &refFile) LONG_STRINGPARAMETER("firstRNExt", &myFirstRNExt) LONG_STRINGPARAMETER("secondRNExt", &mySecondRNExt) LONG_PARAMETER("rnPlus", &myRNPlus) LONG_PARAMETER("noReverseComp", &myReverseComp) LONG_PARAMETER("noeof", &noeof) LONG_PARAMETER("params", ¶ms) LONG_PARAMETER_GROUP("Optional OutputFile Names") LONG_STRINGPARAMETER("outBase", &outBase) LONG_STRINGPARAMETER("firstOut", &firstOut) LONG_STRINGPARAMETER("secondOut", &secondOut) LONG_STRINGPARAMETER("unpairedOut", &unpairedOut) LONG_PHONEHOME(VERSION) END_LONG_PARAMETERS(); inputParameters.Add(new LongParameters ("Input Parameters", longParameterList)); // parameters start at index 2 rather than 1. inputParameters.Read(argc, argv, 2); // If no eof block is required for a bgzf file, set the bgzf file type to // not look for it. if(noeof) { // Set that the eof block is not required. BgzfFileType::setRequireEofBlock(false); } // Check to see if the in file was specified, if not, report an error. if(inFile == "") { usage(); inputParameters.Status(); // In file was not specified but it is mandatory. std::cerr << "--in is a mandatory argument, " << "but was not specified" << std::endl; return(-1); } // Cannot specify both interleaved & secondOut since secondOut would be N/A. if(interleave && !secondOut.IsEmpty()) { usage(); inputParameters.Status(); std::cerr << "ERROR: Cannot specify --merge & --secondOut.\n"; return(-1); } // Check to see if the out file was specified, if not, generate it from // the input filename. if(outBase == "") { // Just remove the extension from the input filename. int extStart = inFile.FastFindLastChar('.'); if(extStart <= 0) { outBase = inFile; } else { outBase = inFile.Left(extStart); } } // Check to see if the first/second/single-ended were specified and // if not, set them. std::string firstExt = "_1.fastq"; if(interleave) { firstExt = "_interleaved.fastq"; } getFileName(firstOut, outBase, firstExt.c_str()); getFileName(secondOut, outBase, "_2.fastq"); getFileName(unpairedOut, outBase, ".fastq"); if(params) { inputParameters.Status(); } // Open the files for reading/writing. // Open prior to opening the output files, // so if there is an error, the outputs don't get created. SamFile samIn; SamFileHeader samHeader; samIn.OpenForRead(inFile, &samHeader); // Open the output files. myUnpairedFile = ifopen(unpairedOut, "w"); // Only open the first file if it is different than an already opened file. if(firstOut != unpairedOut) { myFirstFile = ifopen(firstOut, "w"); } else { myFirstFile = myUnpairedFile; } // If it is interleaved or the 2nd file is not a new name, set it appropriately. if(interleave || secondOut == firstOut) { mySecondFile = myFirstFile; } else if(secondOut == unpairedOut) { mySecondFile = myUnpairedFile; } else { mySecondFile = ifopen(secondOut, "w"); } if(myUnpairedFile == NULL) { std::cerr << "Failed to open " << unpairedOut << " so can't convert bam2FastQ.\n"; return(-1); } if(myFirstFile == NULL) { std::cerr << "Failed to open " << firstOut << " so can't convert bam2FastQ.\n"; return(-1); } if(mySecondFile == NULL) { std::cerr << "Failed to open " << secondOut << " so can't convert bam2FastQ.\n"; return(-1); } if((readName) || (strcmp(samHeader.getSortOrder(), "queryname") == 0)) { readName = true; } else { // defaulting to coordinate sorted. samIn.setSortedValidation(SamFile::COORDINATE); } // Setup the '=' translation if the reference was specified. if(!refFile.IsEmpty()) { GenomeSequence* refPtr = new GenomeSequence(refFile); samIn.SetReadSequenceTranslation(SamRecord::BASES); samIn.SetReference(refPtr); } SamRecord* recordPtr; int16_t samFlag; SamStatus::Status returnStatus = SamStatus::SUCCESS; while(returnStatus == SamStatus::SUCCESS) { recordPtr = myPool.getRecord(); if(recordPtr == NULL) { // Failed to allocate a new record. throw(std::runtime_error("Failed to allocate a new SAM/BAM record")); } if(!samIn.ReadRecord(samHeader, *recordPtr)) { // Failed to read a record. returnStatus = samIn.GetStatus(); continue; } // Have a record. Check to see if it is a pair or unpaired read. samFlag = recordPtr->getFlag(); if(SamFlag::isPaired(samFlag)) { if(readName) { handlePairedRN(*recordPtr); } else { handlePairedCoord(*recordPtr); } } else { ++myNumUnpaired; writeFastQ(*recordPtr, myUnpairedFile); } } // Flush All cleanUpMateMap(0, true); if(returnStatus == SamStatus::NO_MORE_RECS) { returnStatus = SamStatus::SUCCESS; } samIn.Close(); closeFiles(); // Output the results std::cerr << "\nFound " << myNumPairs << " read pairs.\n"; std::cerr << "Found " << myNumUnpaired << " unpaired reads.\n"; if(myNumMateFailures != 0) { std::cerr << "Failed to find mates for " << myNumMateFailures << " reads, so they were written as unpaired\n" << " (not included in either of the above counts).\n"; } return(returnStatus); }
// Read a BAM file's header. bool BamInterface::readHeader(IFILE filePtr, SamFileHeader& header, SamStatus& status) { if(filePtr == NULL) { // File is not open, return false. status.setStatus(SamStatus::FAIL_ORDER, "Cannot read header since the file pointer is null"); return(false); } if(filePtr->isOpen() == false) { status.setStatus(SamStatus::FAIL_ORDER, "Cannot read header since the file is not open"); return(false); } // Clear the passed in header. header.resetHeader(); int32_t headerLength; int readSize = ifread(filePtr, &headerLength, sizeof(headerLength)); if(readSize != sizeof(headerLength)) { String errMsg = "Failed to read the BAM header length, read "; errMsg += readSize; errMsg += " bytes instead of "; errMsg += (unsigned int)sizeof(headerLength); status.setStatus(SamStatus::FAIL_IO, errMsg.c_str()); return(false); } String headerStr; if(headerLength > 0) { // Read the header. readSize = ifread(filePtr, headerStr.LockBuffer(headerLength + 1), headerLength); headerStr[headerLength] = 0; headerStr.UnlockBuffer(); if(readSize != headerLength) { // Failed to read the header. status.setStatus(SamStatus::FAIL_IO, "Failed to read the BAM header."); return(false); } } // Parse the header that was read. if(!header.addHeader(headerStr)) { // Status is set in the method on failure. status.setStatus(SamStatus::FAIL_PARSE, header.getErrorMessage()); return(false); } int referenceCount; // Read the number of references sequences. ifread(filePtr, &referenceCount, sizeof(int)); // Get and clear the reference info so it can be set // from the bam reference table. SamReferenceInfo& refInfo = header.getReferenceInfoForBamInterface(); refInfo.clear(); CharBuffer refName; // Read each reference sequence for (int i = 0; i < referenceCount; i++) { int nameLength; int rc; // Read the length of the reference name. rc = ifread(filePtr, &nameLength, sizeof(int)); if(rc != sizeof(int)) { status.setStatus(SamStatus::FAIL_IO, "Failed to read the BAM reference dictionary."); return(false); } // Read the name. refName.readFromFile(filePtr, nameLength); // Read the length of the reference sequence. int32_t refLen; rc = ifread(filePtr, &refLen, sizeof(int)); if(rc != sizeof(int)) { status.setStatus(SamStatus::FAIL_IO, "Failed to read the BAM reference dictionary."); return(false); } refInfo.add(refName.c_str(), refLen); } // Successfully read the file. return(true); }
void parseOutRG(SamFileHeader& header, std::string& noRgPgString, SamFileHeader* newHeader, bool ignorePI) { noRgPgString.clear(); // strings for comparing if two RGs with same ID are the same. static std::string prevString = ""; static std::string newString = ""; SamHeaderRecord* rec = header.getNextHeaderRecord(); while(rec != NULL) { if(rec->getType() == SamHeaderRecord::RG) { if(newHeader != NULL) { // This is an RG line. // First check if this RG is already included in the new header. SamHeaderRG* prevRG = newHeader->getRG(rec->getTagValue("ID")); if(prevRG != NULL) { // This RG already exists, check that they are the same. // If they are the same, there is nothing to do. bool status = true; prevString.clear(); newString.clear(); status &= prevRG->appendString(prevString); status &= rec->appendString(newString); if(prevString != newString) { if(!ignorePI) { Logger::gLogger->error("Failed to add readgroup to " "header, duplicate, but " "non-identical RG ID, %s\n" "prev:\t(%s)\nnew:\t(%s)", rec->getTagValue("ID"), prevString.c_str(), newString.c_str()); } else { // Check for a PI string. size_t prevPIStart = prevString.find("PI:"); size_t newPIStart = newString.find("PI:"); // If they are both npos, then PI was not found // so fail. if((prevPIStart == std::string::npos) && (newPIStart == std::string::npos)) { // They are not identical, so report an error. Logger::gLogger->error("Failed to add readgroup" " to header, duplicate," " but non-identical RG" " ID, %s\n" "prev:\t(%s)\nnew:\t(%s)", rec->getTagValue("ID"), prevString.c_str(), newString.c_str()); } else { // PI found in one or both strings. size_t prevPIEnd; size_t newPIEnd; if(prevPIStart == std::string::npos) { // new string has PI, so compare to the start of that. prevPIStart = newPIStart; prevPIEnd = newPIStart; } else { prevPIEnd = prevString.find('\t', prevPIStart) + 1; } if(newPIStart == std::string::npos) { // new string has PI, so compare to the start of that. newPIStart = prevPIStart; newPIEnd = newPIStart; } else { newPIEnd = newString.find('\t', newPIStart) + 1; } // Compare before PI. if((newString.compare(0, newPIStart, prevString, 0, prevPIStart) != 0) || (newString.compare(newPIEnd, std::string::npos, prevString, prevPIEnd, std::string::npos) != 0)) { // They are not identical, so report an error. Logger::gLogger->error("Failed to add readgroup to header, " "duplicate, but non-identical RG ID, %s, " "even when ignoring PI\n" "prev:\t(%s)\nnew:\t(%s)", rec->getTagValue("ID"), prevString.c_str(), newString.c_str()); } else { Logger::gLogger->warning("Warning: ignoring non-identical PI field " "for RG ID, %s", rec->getTagValue("ID")); } } } } } else { // This RG does not exist yet, so add it to the new header. if(!newHeader->addRecordCopy((SamHeaderRG&)(*rec))) { // Failed to add the RG, exit. Logger::gLogger->error("Failed to add readgroup to header, %s", newHeader->getErrorMessage()); } } } } else if(rec->getType() == SamHeaderRecord::PG) { if(newHeader != NULL) { // This is a PG line. // First check if this PG is already included in the new header. SamHeaderPG* prevPG = newHeader->getPG(rec->getTagValue("ID")); if(prevPG != NULL) { // This PG already exists, check if they are the same. // If they are the same, there is nothing to do. bool status = true; prevString.clear(); newString.clear(); status &= prevPG->appendString(prevString); status &= rec->appendString(newString); if(prevString != newString) { // They are not identical, ignore for now. // TODO: change the ID, and add it. Logger::gLogger->warning("Warning: dropping duplicate, " "but non-identical PG ID, %s", rec->getTagValue("ID")); } } else { // This PG does not exist yet, so add it to the new header. if(!newHeader->addRecordCopy((SamHeaderPG&)(*rec))) { // Failed to add the PG, exit. Logger::gLogger->error("Failed to add PG to header, %s", newHeader->getErrorMessage()); } } } } else { rec->appendString(noRgPgString); } rec = header.getNextHeaderRecord(); } // Append the comments. header.appendCommentLines(noRgPgString); }
int Stats::execute(int argc, char **argv) { // Extract command line arguments. String inFile = ""; String indexFile = ""; bool basic = false; bool noeof = false; bool params = false; bool qual = false; bool phred = false; int maxNumReads = -1; bool unmapped = false; String pBaseQC = ""; String cBaseQC = ""; String regionList = ""; int excludeFlags = 0; int requiredFlags = 0; bool withinRegion = false; int minMapQual = 0; String dbsnp = ""; PosList *dbsnpListPtr = NULL; bool baseSum = false; int bufferSize = PileupHelper::DEFAULT_WINDOW_SIZE; ParameterList inputParameters; BEGIN_LONG_PARAMETERS(longParameterList) LONG_PARAMETER_GROUP("Required Parameters") LONG_STRINGPARAMETER("in", &inFile) LONG_PARAMETER_GROUP("Types of Statistics") LONG_PARAMETER("basic", &basic) LONG_PARAMETER("qual", &qual) LONG_PARAMETER("phred", &phred) LONG_STRINGPARAMETER("pBaseQC", &pBaseQC) LONG_STRINGPARAMETER("cBaseQC", &cBaseQC) LONG_PARAMETER_GROUP("Optional Parameters") LONG_INTPARAMETER("maxNumReads", &maxNumReads) LONG_PARAMETER("unmapped", &unmapped) LONG_STRINGPARAMETER("bamIndex", &indexFile) LONG_STRINGPARAMETER("regionList", ®ionList) LONG_INTPARAMETER("excludeFlags", &excludeFlags) LONG_INTPARAMETER("requiredFlags", &requiredFlags) LONG_PARAMETER("noeof", &noeof) LONG_PARAMETER("params", ¶ms) LONG_PARAMETER_GROUP("Optional phred/qual Only Parameters") LONG_PARAMETER("withinRegion", &withinRegion) LONG_PARAMETER_GROUP("Optional BaseQC Only Parameters") LONG_PARAMETER("baseSum", &baseSum) LONG_INTPARAMETER("bufferSize", &bufferSize) LONG_INTPARAMETER("minMapQual", &minMapQual) LONG_STRINGPARAMETER("dbsnp", &dbsnp) END_LONG_PARAMETERS(); inputParameters.Add(new LongParameters ("Input Parameters", longParameterList)); inputParameters.Read(argc-1, &(argv[1])); // If no eof block is required for a bgzf file, set the bgzf file type to // not look for it. if(noeof) { // Set that the eof block is not required. BgzfFileType::setRequireEofBlock(false); } // Check to see if the in file was specified, if not, report an error. if(inFile == "") { usage(); inputParameters.Status(); // In file was not specified but it is mandatory. std::cerr << "--in is a mandatory argument for stats, " << "but was not specified" << std::endl; return(-1); } // Use the index file if unmapped or regionList is not empty. bool useIndex = (unmapped|| (!regionList.IsEmpty())); // IndexFile is required, so check to see if it has been set. if(useIndex && (indexFile == "")) { // In file was not specified, so set it to the in file // + ".bai" indexFile = inFile + ".bai"; } //////////////////////////////////////// // Setup in case pileup is used. Pileup<PileupElementBaseQCStats> pileup(bufferSize); // Initialize start/end positions. myStartPos = 0; myEndPos = -1; // Open the output qc file if applicable. IFILE baseQCPtr = NULL; if(!pBaseQC.IsEmpty() && !cBaseQC.IsEmpty()) { usage(); inputParameters.Status(); // Cannot specify both types of baseQC. std::cerr << "Cannot specify both --pBaseQC & --cBaseQC." << std::endl; return(-1); } else if(!pBaseQC.IsEmpty()) { baseQCPtr = ifopen(pBaseQC, "w"); PileupElementBaseQCStats::setPercentStats(true); } else if(!cBaseQC.IsEmpty()) { baseQCPtr = ifopen(cBaseQC, "w"); PileupElementBaseQCStats::setPercentStats(false); } if(baseQCPtr != NULL) { PileupElementBaseQCStats::setOutputFile(baseQCPtr); PileupElementBaseQCStats::printHeader(); } if((baseQCPtr != NULL) || baseSum) { PileupElementBaseQCStats::setMapQualFilter(minMapQual); PileupElementBaseQCStats::setBaseSum(baseSum); } if(params) { inputParameters.Status(); } // Open the file for reading. SamFile samIn; if(!samIn.OpenForRead(inFile)) { fprintf(stderr, "%s\n", samIn.GetStatusMessage()); return(samIn.GetStatus()); } samIn.SetReadFlags(requiredFlags, excludeFlags); // Set whether or not basic statistics should be generated. samIn.GenerateStatistics(basic); // Read the sam header. SamFileHeader samHeader; if(!samIn.ReadHeader(samHeader)) { fprintf(stderr, "%s\n", samIn.GetStatusMessage()); return(samIn.GetStatus()); } // Open the bam index file for reading if we are // doing unmapped reads (also set the read section). if(useIndex) { samIn.ReadBamIndex(indexFile); if(unmapped) { samIn.SetReadSection(-1); } if(!regionList.IsEmpty()) { myRegionList = ifopen(regionList, "r"); } } ////////////////////////// // Read dbsnp if specified and doing baseQC if(((baseQCPtr != NULL) || baseSum) && (!dbsnp.IsEmpty())) { // Read the dbsnp file. IFILE fdbSnp; fdbSnp = ifopen(dbsnp,"r"); // Determine how many entries. const SamReferenceInfo& refInfo = samHeader.getReferenceInfo(); int maxRefLen = 0; for(int i = 0; i < refInfo.getNumEntries(); i++) { int refLen = refInfo.getReferenceLength(i); if(refLen >= maxRefLen) { maxRefLen = refLen + 1; } } dbsnpListPtr = new PosList(refInfo.getNumEntries(),maxRefLen); if(fdbSnp==NULL) { std::cerr << "Open dbSNP file " << dbsnp.c_str() << " failed!\n"; } else if(dbsnpListPtr == NULL) { std::cerr << "Failed to init the memory allocation for the dbsnpList.\n"; } else { // Read the dbsnp file. StringArray tokens; String buffer; int position = 0; int refID = 0; // Loop til the end of the file. while (!ifeof(fdbSnp)) { // Read the next line. buffer.ReadLine(fdbSnp); // If it does not have at least 2 columns, // continue to the next line. if (buffer.IsEmpty() || buffer[0] == '#') continue; tokens.AddTokens(buffer); if(tokens.Length() < 2) continue; if(!tokens[1].AsInteger(position)) { std::cerr << "Improperly formatted region line, start position " << "(2nd column) is not an integer: " << tokens[1] << "; Skipping to the next line.\n"; continue; } // Look up the reference name. refID = samHeader.getReferenceID(tokens[0]); if(refID != SamReferenceInfo::NO_REF_ID) { // Reference id was found, so add it to the dbsnp dbsnpListPtr->addPosition(refID, position); } tokens.Clear(); buffer.Clear(); } } ifclose(fdbSnp); } // Read the sam records. SamRecord samRecord; int numReads = 0; ////////////////////// // Setup in case doing a quality count. // Quality histogram. const int MAX_QUAL = 126; const int START_QUAL = 33; uint64_t qualCount[MAX_QUAL+1]; for(int i = 0; i <= MAX_QUAL; i++) { qualCount[i] = 0; } const int START_PHRED = 0; const int PHRED_DIFF = START_QUAL - START_PHRED; const int MAX_PHRED = MAX_QUAL - PHRED_DIFF; uint64_t phredCount[MAX_PHRED+1]; for(int i = 0; i <= MAX_PHRED; i++) { phredCount[i] = 0; } int refPos = 0; Cigar* cigarPtr = NULL; char cigarChar = '?'; // Exclude clips from the qual/phred counts if unmapped reads are excluded. bool qualExcludeClips = excludeFlags & SamFlag::UNMAPPED; ////////////////////////////////// // When not reading by sections, getNextSection returns true // the first time, then false the next time. while(getNextSection(samIn)) { // Keep reading records from the file until SamFile::ReadRecord // indicates to stop (returns false). while(((maxNumReads < 0) || (numReads < maxNumReads)) && samIn.ReadRecord(samHeader, samRecord)) { // Another record was read, so increment the number of reads. ++numReads; // See if the quality histogram should be genereated. if(qual || phred) { // Get the quality. const char* qual = samRecord.getQuality(); // Check for no quality ('*'). if((qual[0] == '*') && (qual[1] == 0)) { // This record does not have a quality string, so no // quality processing is necessary. } else { int index = 0; cigarPtr = samRecord.getCigarInfo(); cigarChar = '?'; refPos = samRecord.get0BasedPosition(); if(!qualExcludeClips && (cigarPtr != NULL)) { // Offset the reference position by any soft clips // by subtracting the queryIndex of this start position. // refPos is now the start position of the clips. refPos -= cigarPtr->getQueryIndex(0); } while(qual[index] != 0) { // Skip this quality if it is clipped and we are skipping clips. if(cigarPtr != NULL) { cigarChar = cigarPtr->getCigarCharOpFromQueryIndex(index); } if(qualExcludeClips && Cigar::isClip(cigarChar)) { // Skip a clipped quality. ++index; // Increment the position. continue; } if(withinRegion && (myEndPos != -1) && (refPos >= myEndPos)) { // We have hit the end of the region, stop processing this // quality string. break; } if(withinRegion && (refPos < myStartPos)) { // This position is not in the target. ++index; // Update the position if this is found in the reference or a clip. if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar)) { ++refPos; } continue; } // Check for valid quality. if((qual[index] < START_QUAL) || (qual[index] > MAX_QUAL)) { if(qual) { std::cerr << "Invalid Quality found: " << qual[index] << ". Must be between " << START_QUAL << " and " << MAX_QUAL << ".\n"; } if(phred) { std::cerr << "Invalid Phred Quality found: " << qual[index] - PHRED_DIFF << ". Must be between " << START_QUAL << " and " << MAX_QUAL << ".\n"; } // Skip an invalid quality. ++index; // Update the position if this is found in the reference or a clip. if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar)) { ++refPos; } continue; } // Increment the count for this quality. ++(qualCount[(int)(qual[index])]); ++(phredCount[(int)(qual[index]) - PHRED_DIFF]); // Update the position if this is found in the reference or a clip. if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar)) { ++refPos; } ++index; } } } // Check the next thing to do for the read. if((baseQCPtr != NULL) || baseSum) { // Pileup the bases for this read. pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr); } } // Done with a section, move on to the next one. // New section, so flush the pileup. pileup.flushPileup(); } // Flush the rest of the pileup. if((baseQCPtr != NULL) || baseSum) { // Pileup the bases. pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr); PileupElementBaseQCStats::printSummary(); ifclose(baseQCPtr); } std::cerr << "Number of records read = " << samIn.GetCurrentRecordCount() << std::endl; if(basic) { std::cerr << std::endl; samIn.PrintStatistics(); } // Print the quality stats. if(qual) { std::cerr << std::endl; std::cerr << "Quality\tCount\n"; for(int i = START_QUAL; i <= MAX_QUAL; i++) { std::cerr << i << "\t" << qualCount[i] << std::endl; } } // Print the phred quality stats. if(phred) { std::cerr << std::endl; std::cerr << "Phred\tCount\n"; for(int i = START_PHRED; i <= MAX_PHRED; i++) { std::cerr << i << "\t" << phredCount[i] << std::endl; } } SamStatus::Status status = samIn.GetStatus(); if(status == SamStatus::NO_MORE_RECS) { // A status of NO_MORE_RECS means that all reads were successful. status = SamStatus::SUCCESS; } return(status); }
void testModHeader(SamFileHeader& samHeader) { // Check the header line. std::string headerString = ""; assert(samHeader.getHeaderString(headerString) == true); assert(headerString == "@SQ\tSN:1\tLN:247249719\n@SQ\tSN:2\tLN:242951149\n@SQ\tSN:3\tLN:199501827\n@SQ\tSN:4\tLN:191273063\n@SQ\tSN:5\tLN:180857866\n@SQ\tSN:6\tLN:170899992\n@SQ\tSN:7\tLN:158821424\n@SQ\tSN:8\tLN:146274826\n@SQ\tSN:9\tLN:140273252\n@SQ\tSN:10\tLN:135374737\n@SQ\tSN:11\tLN:134452384\n@SQ\tSN:12\tLN:132349534\n@SQ\tSN:13\tLN:114142980\n@SQ\tSN:14\tLN:106368585\n@SQ\tSN:15\tLN:100338915\n@SQ\tSN:16\tLN:88827254\n@SQ\tSN:17\tLN:78774742\n@SQ\tSN:18\tLN:76117153\n@SQ\tSN:19\tLN:63811651\n@SQ\tSN:20\tLN:62435964\n@SQ\tSN:21\tLN:46944323\n@SQ\tSN:22\tLN:49691432\n@SQ\tSN:X\tLN:154913754\n@RG\tID:myID\tLB:library\tSM:sample\n@RG\tID:myID2\tSM:sample2\tLB:library2\n@CO\tComment 1\n@CO\tComment 2\n"); // Remove a tag - by setting it to "". assert(samHeader.setRGTag("LB", "", "myID2") == true); // Check the header line. assert(samHeader.getHeaderString(headerString) == true); assert(headerString == "@SQ\tSN:1\tLN:247249719\n@SQ\tSN:2\tLN:242951149\n@SQ\tSN:3\tLN:199501827\n@SQ\tSN:4\tLN:191273063\n@SQ\tSN:5\tLN:180857866\n@SQ\tSN:6\tLN:170899992\n@SQ\tSN:7\tLN:158821424\n@SQ\tSN:8\tLN:146274826\n@SQ\tSN:9\tLN:140273252\n@SQ\tSN:10\tLN:135374737\n@SQ\tSN:11\tLN:134452384\n@SQ\tSN:12\tLN:132349534\n@SQ\tSN:13\tLN:114142980\n@SQ\tSN:14\tLN:106368585\n@SQ\tSN:15\tLN:100338915\n@SQ\tSN:16\tLN:88827254\n@SQ\tSN:17\tLN:78774742\n@SQ\tSN:18\tLN:76117153\n@SQ\tSN:19\tLN:63811651\n@SQ\tSN:20\tLN:62435964\n@SQ\tSN:21\tLN:46944323\n@SQ\tSN:22\tLN:49691432\n@SQ\tSN:X\tLN:154913754\n@RG\tID:myID\tLB:library\tSM:sample\n@RG\tID:myID2\tSM:sample2\n@CO\tComment 1\n@CO\tComment 2\n"); // Add an HD tag. SamHeaderHD* hd = new SamHeaderHD(); assert(hd->setTag("VN", "1.3") == true); assert(samHeader.addHD(hd) == true); assert(strcmp(samHeader.getHDTagValue("VN"), "1.3") == 0); assert(samHeader.getHeaderString(headerString) == true); assert(headerString == "@SQ\tSN:1\tLN:247249719\n@SQ\tSN:2\tLN:242951149\n@SQ\tSN:3\tLN:199501827\n@SQ\tSN:4\tLN:191273063\n@SQ\tSN:5\tLN:180857866\n@SQ\tSN:6\tLN:170899992\n@SQ\tSN:7\tLN:158821424\n@SQ\tSN:8\tLN:146274826\n@SQ\tSN:9\tLN:140273252\n@SQ\tSN:10\tLN:135374737\n@SQ\tSN:11\tLN:134452384\n@SQ\tSN:12\tLN:132349534\n@SQ\tSN:13\tLN:114142980\n@SQ\tSN:14\tLN:106368585\n@SQ\tSN:15\tLN:100338915\n@SQ\tSN:16\tLN:88827254\n@SQ\tSN:17\tLN:78774742\n@SQ\tSN:18\tLN:76117153\n@SQ\tSN:19\tLN:63811651\n@SQ\tSN:20\tLN:62435964\n@SQ\tSN:21\tLN:46944323\n@SQ\tSN:22\tLN:49691432\n@SQ\tSN:X\tLN:154913754\n@RG\tID:myID\tLB:library\tSM:sample\n@RG\tID:myID2\tSM:sample2\n@HD\tVN:1.3\n@CO\tComment 1\n@CO\tComment 2\n"); // Try adding another HD tag. SamHeaderHD* hd2 = new SamHeaderHD(); assert(hd2->setTag("VN", "1.4") == true); assert(samHeader.addHD(hd2) == false); assert(strcmp(samHeader.getHDTagValue("VN"), "1.4") != 0); assert(strcmp(samHeader.getHDTagValue("VN"), "1.3") == 0); assert(samHeader.getHeaderString(headerString) == true); assert(headerString == "@SQ\tSN:1\tLN:247249719\n@SQ\tSN:2\tLN:242951149\n@SQ\tSN:3\tLN:199501827\n@SQ\tSN:4\tLN:191273063\n@SQ\tSN:5\tLN:180857866\n@SQ\tSN:6\tLN:170899992\n@SQ\tSN:7\tLN:158821424\n@SQ\tSN:8\tLN:146274826\n@SQ\tSN:9\tLN:140273252\n@SQ\tSN:10\tLN:135374737\n@SQ\tSN:11\tLN:134452384\n@SQ\tSN:12\tLN:132349534\n@SQ\tSN:13\tLN:114142980\n@SQ\tSN:14\tLN:106368585\n@SQ\tSN:15\tLN:100338915\n@SQ\tSN:16\tLN:88827254\n@SQ\tSN:17\tLN:78774742\n@SQ\tSN:18\tLN:76117153\n@SQ\tSN:19\tLN:63811651\n@SQ\tSN:20\tLN:62435964\n@SQ\tSN:21\tLN:46944323\n@SQ\tSN:22\tLN:49691432\n@SQ\tSN:X\tLN:154913754\n@RG\tID:myID\tLB:library\tSM:sample\n@RG\tID:myID2\tSM:sample2\n@HD\tVN:1.3\n@CO\tComment 1\n@CO\tComment 2\n"); // Remove the entire HD Tag. assert(samHeader.removeHD() == true); assert(strcmp(samHeader.getHDTagValue("VN"), "") == 0); assert(samHeader.getHeaderString(headerString) == true); assert(headerString == "@SQ\tSN:1\tLN:247249719\n@SQ\tSN:2\tLN:242951149\n@SQ\tSN:3\tLN:199501827\n@SQ\tSN:4\tLN:191273063\n@SQ\tSN:5\tLN:180857866\n@SQ\tSN:6\tLN:170899992\n@SQ\tSN:7\tLN:158821424\n@SQ\tSN:8\tLN:146274826\n@SQ\tSN:9\tLN:140273252\n@SQ\tSN:10\tLN:135374737\n@SQ\tSN:11\tLN:134452384\n@SQ\tSN:12\tLN:132349534\n@SQ\tSN:13\tLN:114142980\n@SQ\tSN:14\tLN:106368585\n@SQ\tSN:15\tLN:100338915\n@SQ\tSN:16\tLN:88827254\n@SQ\tSN:17\tLN:78774742\n@SQ\tSN:18\tLN:76117153\n@SQ\tSN:19\tLN:63811651\n@SQ\tSN:20\tLN:62435964\n@SQ\tSN:21\tLN:46944323\n@SQ\tSN:22\tLN:49691432\n@SQ\tSN:X\tLN:154913754\n@RG\tID:myID\tLB:library\tSM:sample\n@RG\tID:myID2\tSM:sample2\n@CO\tComment 1\n@CO\tComment 2\n"); // Remove an entire SQ Tag. assert(strcmp(samHeader.getSQTagValue("LN", "11"), "134452384") == 0); assert(samHeader.removeSQ("11") == true); assert(strcmp(samHeader.getSQTagValue("LN", "11"), "") == 0); assert(samHeader.getHeaderString(headerString) == true); assert(headerString == "@SQ\tSN:1\tLN:247249719\n@SQ\tSN:2\tLN:242951149\n@SQ\tSN:3\tLN:199501827\n@SQ\tSN:4\tLN:191273063\n@SQ\tSN:5\tLN:180857866\n@SQ\tSN:6\tLN:170899992\n@SQ\tSN:7\tLN:158821424\n@SQ\tSN:8\tLN:146274826\n@SQ\tSN:9\tLN:140273252\n@SQ\tSN:10\tLN:135374737\n@SQ\tSN:12\tLN:132349534\n@SQ\tSN:13\tLN:114142980\n@SQ\tSN:14\tLN:106368585\n@SQ\tSN:15\tLN:100338915\n@SQ\tSN:16\tLN:88827254\n@SQ\tSN:17\tLN:78774742\n@SQ\tSN:18\tLN:76117153\n@SQ\tSN:19\tLN:63811651\n@SQ\tSN:20\tLN:62435964\n@SQ\tSN:21\tLN:46944323\n@SQ\tSN:22\tLN:49691432\n@SQ\tSN:X\tLN:154913754\n@RG\tID:myID\tLB:library\tSM:sample\n@RG\tID:myID2\tSM:sample2\n@CO\tComment 1\n@CO\tComment 2\n"); // Try adding a null HD tag. hd = NULL; assert(samHeader.addHD(hd) == false); assert(strcmp(samHeader.getHDTagValue("VN"), "") == 0); assert(strcmp(samHeader.getHDTagValue("VN"), "1.4") != 0); assert(strcmp(samHeader.getHDTagValue("VN"), "1.3") != 0); assert(samHeader.getHeaderString(headerString) == true); assert(headerString == "@SQ\tSN:1\tLN:247249719\n@SQ\tSN:2\tLN:242951149\n@SQ\tSN:3\tLN:199501827\n@SQ\tSN:4\tLN:191273063\n@SQ\tSN:5\tLN:180857866\n@SQ\tSN:6\tLN:170899992\n@SQ\tSN:7\tLN:158821424\n@SQ\tSN:8\tLN:146274826\n@SQ\tSN:9\tLN:140273252\n@SQ\tSN:10\tLN:135374737\n@SQ\tSN:12\tLN:132349534\n@SQ\tSN:13\tLN:114142980\n@SQ\tSN:14\tLN:106368585\n@SQ\tSN:15\tLN:100338915\n@SQ\tSN:16\tLN:88827254\n@SQ\tSN:17\tLN:78774742\n@SQ\tSN:18\tLN:76117153\n@SQ\tSN:19\tLN:63811651\n@SQ\tSN:20\tLN:62435964\n@SQ\tSN:21\tLN:46944323\n@SQ\tSN:22\tLN:49691432\n@SQ\tSN:X\tLN:154913754\n@RG\tID:myID\tLB:library\tSM:sample\n@RG\tID:myID2\tSM:sample2\n@CO\tComment 1\n@CO\tComment 2\n"); // Try adding a null SQ tag. SamHeaderSQ* sq = NULL; assert(samHeader.addSQ(sq) == false); assert(samHeader.getHeaderString(headerString) == true); assert(headerString == "@SQ\tSN:1\tLN:247249719\n@SQ\tSN:2\tLN:242951149\n@SQ\tSN:3\tLN:199501827\n@SQ\tSN:4\tLN:191273063\n@SQ\tSN:5\tLN:180857866\n@SQ\tSN:6\tLN:170899992\n@SQ\tSN:7\tLN:158821424\n@SQ\tSN:8\tLN:146274826\n@SQ\tSN:9\tLN:140273252\n@SQ\tSN:10\tLN:135374737\n@SQ\tSN:12\tLN:132349534\n@SQ\tSN:13\tLN:114142980\n@SQ\tSN:14\tLN:106368585\n@SQ\tSN:15\tLN:100338915\n@SQ\tSN:16\tLN:88827254\n@SQ\tSN:17\tLN:78774742\n@SQ\tSN:18\tLN:76117153\n@SQ\tSN:19\tLN:63811651\n@SQ\tSN:20\tLN:62435964\n@SQ\tSN:21\tLN:46944323\n@SQ\tSN:22\tLN:49691432\n@SQ\tSN:X\tLN:154913754\n@RG\tID:myID\tLB:library\tSM:sample\n@RG\tID:myID2\tSM:sample2\n@CO\tComment 1\n@CO\tComment 2\n"); // Try adding an HD tag again. assert(samHeader.addHD(hd2) == true); assert(strcmp(samHeader.getHDTagValue("VN"), "1.4") == 0); assert(strcmp(samHeader.getHDTagValue("VN"), "1.3") != 0); assert(samHeader.getHeaderString(headerString) == true); assert(headerString == "@SQ\tSN:1\tLN:247249719\n@SQ\tSN:2\tLN:242951149\n@SQ\tSN:3\tLN:199501827\n@SQ\tSN:4\tLN:191273063\n@SQ\tSN:5\tLN:180857866\n@SQ\tSN:6\tLN:170899992\n@SQ\tSN:7\tLN:158821424\n@SQ\tSN:8\tLN:146274826\n@SQ\tSN:9\tLN:140273252\n@SQ\tSN:10\tLN:135374737\n@SQ\tSN:12\tLN:132349534\n@SQ\tSN:13\tLN:114142980\n@SQ\tSN:14\tLN:106368585\n@SQ\tSN:15\tLN:100338915\n@SQ\tSN:16\tLN:88827254\n@SQ\tSN:17\tLN:78774742\n@SQ\tSN:18\tLN:76117153\n@SQ\tSN:19\tLN:63811651\n@SQ\tSN:20\tLN:62435964\n@SQ\tSN:21\tLN:46944323\n@SQ\tSN:22\tLN:49691432\n@SQ\tSN:X\tLN:154913754\n@RG\tID:myID\tLB:library\tSM:sample\n@RG\tID:myID2\tSM:sample2\n@HD\tVN:1.4\n@CO\tComment 1\n@CO\tComment 2\n"); // TODO Get the comments. }
// add readgroup header line to the SamFileHeader void addReadGroupToHeader(SamFileHeader& header, ReadGroup& rg) { if ( !header.addHeaderLine(rg.s_header_line.c_str()) ) { Logger::gLogger->error("Failed to add ID = %s, header line %s",rg.s_id.c_str(),rg.s_header_line.c_str()); } }
void parseOutRG(SamFileHeader& header, std::string& noRgPgString, SamFileHeader* newHeader) { noRgPgString.clear(); // strings for comparing if two RGs with same ID are the same. static std::string prevString = ""; static std::string newString = ""; SamHeaderRecord* rec = header.getNextHeaderRecord(); while(rec != NULL) { if(rec->getType() == SamHeaderRecord::RG) { if(newHeader != NULL) { // This is an RG line. // First check if this RG is already included in the new header. SamHeaderRG* prevRG = newHeader->getRG(rec->getTagValue("ID")); if(prevRG != NULL) { // This RG already exists, check that they are the same. // If they are the same, there is nothing to do. bool status = true; prevString.clear(); newString.clear(); status &= prevRG->appendString(prevString); status &= rec->appendString(newString); if(prevString != newString) { // They are not identical, so report an error. Logger::gLogger->error("Failed to add readgroup to header, " "duplicate, but non-identical RG ID, %s", rec->getTagValue("ID")); } } else { // This RG does not exist yet, so add it to the new header. if(!newHeader->addRecordCopy((SamHeaderRG&)(*rec))) { // Failed to add the RG, exit. Logger::gLogger->error("Failed to add readgroup to header, %s", newHeader->getErrorMessage()); } } } } else if(rec->getType() == SamHeaderRecord::PG) { if(newHeader != NULL) { // This is a PG line. // First check if this PG is already included in the new header. SamHeaderPG* prevPG = newHeader->getPG(rec->getTagValue("ID")); if(prevPG != NULL) { // This PG already exists, check if they are the same. // If they are the same, there is nothing to do. bool status = true; prevString.clear(); newString.clear(); status &= prevPG->appendString(prevString); status &= rec->appendString(newString); if(prevString != newString) { // They are not identical, ignore for now. // TODO: change the ID, and add it. Logger::gLogger->warning("Warning: dropping duplicate, " "but non-identical PG ID, %s", rec->getTagValue("ID")); } } else { // This PG does not exist yet, so add it to the new header. if(!newHeader->addRecordCopy((SamHeaderPG&)(*rec))) { // Failed to add the PG, exit. Logger::gLogger->error("Failed to add PG to header, %s", newHeader->getErrorMessage()); } } } } else { rec->appendString(noRgPgString); } rec = header.getNextHeaderRecord(); } // Append the comments. header.appendCommentLines(noRgPgString); }
// main function int MergeBam::execute(int argc, char ** argv) { static struct option getopt_long_options[] = { // Input options { "list", required_argument, NULL, 'l'}, { "in", required_argument, NULL, 'i'}, { "out", required_argument, NULL, 'o'}, { "verbose", no_argument, NULL, 'v'}, { "log", required_argument, NULL, 'L'}, { NULL, 0, NULL, 0 }, }; // Adjust the arguments since it is called as ./bam mergeBam instead of // just mergeBam. --argc; ++argv; int n_option_index = 0; char c; bool b_verbose = false; vector<std::string> vs_in_bam_files; // input BAM files std::string s_list, s_out, s_logger; while ( ( c = getopt_long(argc, argv, "l:i:o:vL:", getopt_long_options, &n_option_index) ) != -1 ) { switch(c) { case 'i': vs_in_bam_files.push_back(optarg); break; case 'l': s_list = optarg; break; case 'o': s_out = optarg; break; case 'v': b_verbose = true; break; case 'L': s_logger = optarg; break; default: fprintf(stderr,"Unrecognized option %s",getopt_long_options[n_option_index].name); abort(); } } if ( s_logger.empty() ) { if(s_out.empty()) { s_logger = "-"; } else { s_logger = s_out + ".log"; } } // create a logger object, now possible to write logs/warnings/errors Logger::gLogger = new Logger(s_logger.c_str(), b_verbose); // every argument must correspond to an option if ( optind < argc ) { usage(); Logger::gLogger->error("non-option argument exist"); } // check the required arguments are nonempty if ( (vs_in_bam_files.empty() && s_list.empty()) || s_out.empty() ) { usage(); Logger::gLogger->error("At least one of the required argument is missing"); } if(!vs_in_bam_files.empty() && !s_list.empty()) { Logger::gLogger->error("Cannot specify both --in/-i and --list/-l"); } if(!s_list.empty()) { Logger::gLogger->writeLog("Input list file : %s",s_list.c_str()); } else { std::string bamList = ""; for(unsigned int i = 0; i < vs_in_bam_files.size(); i++) { if(i != 0) { bamList += ", "; } bamList += vs_in_bam_files[i]; } Logger::gLogger->writeLog("Input list file : %s", bamList.c_str()); } Logger::gLogger->writeLog("Output BAM file : %s",s_out.c_str()); Logger::gLogger->writeLog("Output log file : %s",s_logger.c_str()); Logger::gLogger->writeLog("Verbose mode : %s",b_verbose ? "On" : "Off"); vector<ReadGroup> v_readgroups; // readGroups corresponding to BAM file vector<ReadGroup> v_uniq_readgroups; // unique readGroups written to header // If the list file is being used instead of the individual bams, parse it. if(!s_list.empty()) { // parse the list file and fill the vectors above if ( parseListFile(s_list, vs_in_bam_files, v_readgroups, v_uniq_readgroups) == false ) { Logger::gLogger->error("Error in parsing the list file %s",s_list.c_str()); } if ( vs_in_bam_files.size() != v_readgroups.size() ) { Logger::gLogger->error("parseListFile gave different size for vs_in_bam_files, v_readgroups: %d, %d", vs_in_bam_files.size(), v_readgroups.size()); } } // sanity check uint32_t n_bams = vs_in_bam_files.size(); Logger::gLogger->writeLog("Total of %d BAM files are being merged",n_bams); if ( n_bams < 2 ) { Logger::gLogger->error("At least two BAM files must be specified for merging"); } // create SamFile and SamFileHeader object for each BAM file SamFile *p_in_bams = new SamFile[n_bams]; SamFileHeader *p_headers = new SamFileHeader[n_bams]; // read each BAM file and its header, // making sure that the headers are identical std::string firstHeaderNoRGPG = ""; std::string headerNoRGPG = ""; SamFileHeader newHeader; std::string firstHeaderString = ""; for(uint32_t i=0; i < n_bams; ++i) { if ( ! p_in_bams[i].OpenForRead(vs_in_bam_files[i].c_str()) ) { Logger::gLogger->error("Cannot open BAM file %s for reading",vs_in_bam_files[i].c_str()); } p_in_bams[i].setSortedValidation(SamFile::COORDINATE); p_in_bams[i].ReadHeader(p_headers[i]); // Extract the RGs from this header. if(i == 0) { // First header, so store it as the first header newHeader = p_headers[i]; // Determine the header without RG. parseOutRG(p_headers[i], firstHeaderNoRGPG, NULL); } else { parseOutRG(p_headers[i], headerNoRGPG, &newHeader); if(firstHeaderNoRGPG != headerNoRGPG) { Logger::gLogger->error("The headers are not identical at index %d",i); } if(newHeader.getReferenceInfo() != p_headers[i].getReferenceInfo()) { Logger::gLogger->error("The headers are not identical at index %d",i); } } } // first header will be the new header to be written to output // adding all possible readGroups to the new header for(uint32_t i=0; i < v_uniq_readgroups.size(); ++i) { addReadGroupToHeader(newHeader, v_uniq_readgroups[i]); } // Write an output file with new headers SamFile bam_out; if ( !bam_out.OpenForWrite(s_out.c_str()) ) { Logger::gLogger->error("Cannot open BAM file %s for writing",s_out.c_str()); } bam_out.setSortedValidation(SamFile::COORDINATE); bam_out.WriteHeader(newHeader); // create SamRecords and GenomicCoordinates for each input BAM file SamRecord* p_records = new SamRecord[n_bams]; uint64_t* p_gcoordinates = new uint64_t[n_bams]; // read the first record for every input BAM file for(uint32_t i=0; i < n_bams; ++i) { if ( p_in_bams[i].ReadRecord(p_headers[i],p_records[i]) ) { if ( p_records[i].isValid(p_headers[i]) ) { p_gcoordinates[i] = getGenomicCoordinate(p_records[i]); } else { Logger::gLogger->error("Invalid record found at the first line of file %u. Failure code is %d", i, static_cast<int>(p_in_bams[i].GetFailure())); } } else { if ( p_in_bams[i].GetFailure() == SamStatus::NO_MORE_RECS ) { // the BAM file has no record p_gcoordinates[i] = MAX_GENOMIC_COORDINATE; } else { Logger::gLogger->error("Invalid record found at the first line of file %u. Failure code is %d", i, static_cast<int>(p_in_bams[i].GetFailure())); } } } // Routine for writing output BAM file uint32_t nWrittenRecords = 0; // number of written BAM records while(true) { // scan the minimum index of genomic coordinate int min_idx = -1; uint64_t min_gcoordinate = MAX_GENOMIC_COORDINATE; for(uint32_t i=0; i < n_bams; ++i) { if ( min_gcoordinate > p_gcoordinates[i] ) { min_gcoordinate = p_gcoordinates[i]; min_idx = static_cast<int>(i); } } // If every file reached EOF, exit the loop if ( min_idx < 0 ) break; // If adding read groups, add the tag. if(!v_readgroups.empty()) { // add readGroup tag to the record to write and write to output BAM file //Logger::gLogger->writeLog("%d",min_idx); addReadGroupTag(p_records[min_idx], v_readgroups[min_idx]); } bam_out.WriteRecord(newHeader, p_records[min_idx]); ++nWrittenRecords; if ( nWrittenRecords % 1000000 == 0 ) { Logger::gLogger->writeLog("Writing %u records to the output file",nWrittenRecords); } // Read a record from the input BAM file if ( p_in_bams[min_idx].ReadRecord(p_headers[min_idx], p_records[min_idx]) ) { if ( p_records[min_idx].isValid(p_headers[min_idx]) ) { p_gcoordinates[min_idx] = getGenomicCoordinate(p_records[min_idx]); } else { // if invalid record found Logger::gLogger->error("Invalid record found at recordCount %d of file %d. Failure code is %d", p_in_bams[min_idx].GetCurrentRecordCount(), min_idx, static_cast<int>(p_in_bams[min_idx].GetFailure())); } } else { if ( p_in_bams[min_idx].GetFailure() == SamStatus::NO_MORE_RECS ) { p_gcoordinates[min_idx] = MAX_GENOMIC_COORDINATE; // Mark that all record has been read } else { Logger::gLogger->error("Cannot read record at recordCount %d of file %d. Failure code is %d", p_in_bams[min_idx].GetCurrentRecordCount(), min_idx, static_cast<int>(p_in_bams[min_idx].GetFailure())); } } } // close files and free allocated memory Logger::gLogger->writeLog("Finished writing %d records into the output BAM file",bam_out.GetCurrentRecordCount()); bam_out.Close(); for(uint32_t i=0; i < n_bams; ++i) { p_in_bams[i].Close(); } delete[] p_records; delete[] p_in_bams; delete[] p_headers; delete[] p_gcoordinates; delete Logger::gLogger; return 0; }
int main(int argc, char ** argv) { gpLogger = new Logger; static struct option getopt_long_options[] = { // Input options { "fasta", required_argument, NULL, 'f'}, { "in", required_argument, NULL, 'i'}, { "out", required_argument, NULL, 'o'}, { "verbose", no_argument, NULL, 'v'}, { "log", required_argument, NULL, 'l'}, { "clear", no_argument, NULL, 0}, { "AS", required_argument, NULL, 0}, { "UR", required_argument, NULL, 0}, { "SP", required_argument, NULL, 0}, { "HD", required_argument, NULL, 0}, { "RG", required_argument, NULL, 0}, { "PG", required_argument, NULL, 0}, { "checkSQ", no_argument, NULL, 0}, { NULL, 0, NULL, 0 }, }; int n_option_index = 0, c; std::string sAS, sUR, sSP, sFasta, sInFile, sOutFile, sLogFile; bool bClear, bCheckSQ, bVerbose; std::vector<std::string> vsHDHeaders, vsRGHeaders, vsPGHeaders; bCheckSQ = bVerbose = false; bClear = true; while ( (c = getopt_long(argc, argv, "vf:i:o:l:", getopt_long_options, &n_option_index)) != -1 ) { // std::cout << getopt_long_options[n_option_index].name << "\t" << optarg << std::endl; if ( c == 'f' ) { sFasta = optarg; } else if ( c == 'i' ) { sInFile = optarg; } else if ( c == 'o' ) { sOutFile = optarg; } else if ( c == 'v' ) { bVerbose = true; } else if ( c == 'l' ) { sLogFile = optarg; } else if ( strcmp(getopt_long_options[n_option_index].name,"AS") == 0 ) { sAS = optarg; } else if ( strcmp(getopt_long_options[n_option_index].name,"UR") == 0 ) { sUR = optarg; } else if ( strcmp(getopt_long_options[n_option_index].name,"SP") == 0 ) { sSP = optarg; } else if ( strcmp(getopt_long_options[n_option_index].name,"HD") == 0 ) { vsHDHeaders.push_back(optarg); } else if ( strcmp(getopt_long_options[n_option_index].name,"RG") == 0 ) { vsRGHeaders.push_back(optarg); } else if ( strcmp(getopt_long_options[n_option_index].name,"PG") == 0 ) { vsPGHeaders.push_back(optarg); } else if ( strcmp(getopt_long_options[n_option_index].name,"checkSQ") == 0 ) { bCheckSQ = true; } else { std::cerr << "Error: Unrecognized option " << getopt_long_options[n_option_index].name << std::endl; abort(); } } if ( optind < argc ) { printUsage(std::cerr); gpLogger->error("non-option argument %s exist ",argv[optind]); } if ( sInFile.empty() || sOutFile.empty() ) { printUsage(std::cerr); gpLogger->error("Input and output files are required"); } if ( sLogFile.compare("__NONE__") == 0 ) { sLogFile = (sOutFile + ".log"); } gpLogger->open(sLogFile.c_str(), bVerbose); if ( ( bCheckSQ ) && ( sFasta.empty() ) ) { printUsage(std::cerr); gpLogger->error("--checkSQ option must be used with --fasta option"); } // check whether each header line starts with a correct tag checkHeaderStarts(vsHDHeaders, "@HD\t"); checkHeaderStarts(vsRGHeaders, "@RG\t"); checkHeaderStarts(vsPGHeaders, "@PG\t"); gpLogger->write_log("Arguments in effect:"); gpLogger->write_log("\t--in [%s]",sInFile.c_str()); gpLogger->write_log("\t--out [%s]",sOutFile.c_str()); gpLogger->write_log("\t--log [%s]",sLogFile.c_str()); gpLogger->write_log("\t--fasta [%s]",sFasta.c_str()); gpLogger->write_log("\t--AS [%s]",sAS.c_str()); gpLogger->write_log("\t--UR [%s]",sUR.c_str()); gpLogger->write_log("\t--SP [%s]",sSP.c_str()); gpLogger->write_log("\t--checkSQ [%s]",bClear ? "ON" : "OFF" ); if ( vsHDHeaders.empty() ) { gpLogger->write_log("\t--HD []"); } else { gpLogger->write_log("\t--HD [%s]",vsHDHeaders[0].c_str()); } if ( vsRGHeaders.empty() ) { gpLogger->write_log("\t--RG []"); } else { gpLogger->write_log("\t--RG [%s]",vsRGHeaders[0].c_str()); } if ( vsPGHeaders.empty() ) { gpLogger->write_log("\t--PG []"); } else { for(uint32_t i=0; i < vsPGHeaders.size(); ++i) { gpLogger->write_log("\t--PG [%s]",vsPGHeaders[i].c_str()); } } if ( (vsHDHeaders.empty() ) && ( vsRGHeaders.empty() ) && ( vsPGHeaders.empty() ) && ( !bClear ) && ( sFasta.empty() ) ) { gpLogger->warning("No option is in effect for modifying BAM files. The input and output files will be identical"); } if ( ( vsHDHeaders.size() > 1 ) || ( vsRGHeaders.size() > 1 ) ) { gpLogger->error("HD and RG headers cannot be multiple"); } FastaFile fastaFile; if ( ! sFasta.empty() ) { if ( fastaFile.open(sFasta.c_str()) ) { gpLogger->write_log("Reading the reference file %s",sFasta.c_str()); fastaFile.readThru(); fastaFile.close(); gpLogger->write_log("Finished reading the reference file %s",sFasta.c_str()); } else { gpLogger->error("Failed to open reference file %s",sFasta.c_str()); } } SamFile samIn; SamFile samOut; if ( ! samIn.OpenForRead(sInFile.c_str()) ) { gpLogger->error("Cannot open BAM file %s for reading - %s",sInFile.c_str(), SamStatus::getStatusString(samIn.GetStatus()) ); } if ( ! samOut.OpenForWrite(sOutFile.c_str()) ) { gpLogger->error("Cannot open BAM file %s for writing - %s",sOutFile.c_str(), SamStatus::getStatusString(samOut.GetStatus()) ); } SamFileHeader samHeader; SamHeaderRecord* pSamHeaderRecord; samIn.ReadHeader(samHeader); // check the sanity of SQ file // make sure the SN and LN matches, with the same order if ( bCheckSQ ) { unsigned int numSQ = 0; while( (pSamHeaderRecord = samHeader.getNextHeaderRecord()) != NULL ) { if ( pSamHeaderRecord->getType() == SamHeaderRecord::SQ ) { ++numSQ; } } if ( numSQ != fastaFile.vsSequenceNames.size() ) { gpLogger->error("# of @SQ tags are different from the original BAM and the reference file"); } // iterator over all @SQ objects for(unsigned int i=0; i < numSQ; ++i) { pSamHeaderRecord = samHeader.getSQ(fastaFile.vsSequenceNames[i].c_str()); if ( fastaFile.vsSequenceNames[i].compare(pSamHeaderRecord->getTagValue("SN")) != 0 ) { gpLogger->error("SequenceName is not identical between fasta and input BAM file"); } else if ( static_cast<int>(fastaFile.vnSequenceLengths[i]) != atoi(pSamHeaderRecord->getTagValue("LN")) ) { gpLogger->error("SequenceLength is not identical between fasta and input BAM file"); } else { if ( !sAS.empty() ) samHeader.setSQTag("AS",sAS.c_str(),fastaFile.vsSequenceNames[i].c_str()); samHeader.setSQTag("M5",fastaFile.vsMD5sums[i].c_str(),fastaFile.vsSequenceNames[i].c_str()); if ( !sUR.empty() ) samHeader.setSQTag("UR",sUR.c_str(),fastaFile.vsSequenceNames[i].c_str()); if ( !sSP.empty() ) samHeader.setSQTag("SP",sSP.c_str(),fastaFile.vsSequenceNames[i].c_str()); } } gpLogger->write_log("Finished checking the consistency of SQ tags"); } else { gpLogger->write_log("Skipped checking the consistency of SQ tags"); } // go over the headers again, // assuming order of HD, SQ, RG, PG, and put proper tags at the end of the original tags gpLogger->write_log("Creating the header of new output file"); //SamFileHeader outHeader; samHeader.resetHeaderRecordIter(); for(unsigned int i=0; i < vsHDHeaders.size(); ++i) { samHeader.addHeaderLine(vsHDHeaders[i].c_str()); } /* for(int i=0; i < fastaFile.vsSequenceNames.size(); ++i) { std::string s("@SQ\tSN:"); char buf[1024]; s += fastaFile.vsSequenceNames[i]; sprintf(buf,"\tLN:%d",fastaFile.vnSequenceLengths[i]); s += buf; if ( !sAS.empty() ) { sprintf(buf,"\tAS:%s",sAS.c_str()); s += buf; } if ( !sUR.empty() ) { sprintf(buf,"\tUR:%s",sUR.c_str()); s += buf; } sprintf(buf,"\tM5:%s",fastaFile.vsMD5sums[i].c_str()); s += buf; if ( !sSP.empty() ) { sprintf(buf,"\tSP:%s",sSP.c_str()); s += buf; } outHeader.addHeaderLine(s.c_str()); }*/ for(unsigned int i=0; i < vsRGHeaders.size(); ++i) { samHeader.addHeaderLine(vsRGHeaders[i].c_str()); } for(unsigned int i=0; i < vsPGHeaders.size(); ++i) { samHeader.addHeaderLine(vsPGHeaders[i].c_str()); } samOut.WriteHeader(samHeader); gpLogger->write_log("Adding %d HD, %d RG, and %d PG headers",vsHDHeaders.size(), vsRGHeaders.size(), vsPGHeaders.size()); gpLogger->write_log("Finished writing output headers"); // parse RG tag and get RG ID to append std::string sRGID; if ( ! vsRGHeaders.empty() ) { std::vector<std::string> tokens; FastaFile::tokenizeString( vsRGHeaders[0].c_str(), tokens ); for(unsigned int i=0; i < tokens.size(); ++i) { if ( tokens[i].find("ID:") == 0 ) { sRGID = tokens[i].substr(3); } } } gpLogger->write_log("Writing output BAM file"); SamRecord samRecord; while (samIn.ReadRecord(samHeader, samRecord) == true) { if ( !sRGID.empty() ) { if ( samRecord.addTag("RG",'Z',sRGID.c_str()) == false ) { gpLogger->error("Failed to add a RG tag %s",sRGID.c_str()); } // temporary code added if ( strncmp(samRecord.getReadName(),"seqcore_",8) == 0 ) { char buf[1024]; sprintf(buf,"UM%s",samRecord.getReadName()+8); samRecord.setReadName(buf); } } samOut.WriteRecord(samHeader, samRecord); //if ( samIn.GetCurrentRecordCount() == 1000 ) break; } samOut.Close(); gpLogger->write_log("Successfully written %d records",samIn.GetCurrentRecordCount()); delete gpLogger; return 0; }
bool BamInterface::writeHeader(IFILE filePtr, SamFileHeader& header, SamStatus& status) { if((filePtr == NULL) || (filePtr->isOpen() == false)) { // File is not open, return false. status.setStatus(SamStatus::FAIL_ORDER, "Cannot write header since the file pointer is null"); return(false); } char magic[4]; magic[0] = 'B'; magic[1] = 'A'; magic[2] = 'M'; magic[3] = 1; // Write magic to the file. ifwrite(filePtr, magic, 4); //////////////////////////////// // Write the header to the file. //////////////////////////////// // Construct a string containing the entire header. std::string headerString = ""; header.getHeaderString(headerString); int32_t headerLen = headerString.length(); int numWrite = 0; // Write the header length. numWrite = ifwrite(filePtr, &headerLen, sizeof(int32_t)); if(numWrite != sizeof(int32_t)) { status.setStatus(SamStatus::FAIL_IO, "Failed to write the BAM header length."); return(false); } // Write the header to the file. numWrite = ifwrite(filePtr, headerString.c_str(), headerLen); if(numWrite != headerLen) { status.setStatus(SamStatus::FAIL_IO, "Failed to write the BAM header."); return(false); } //////////////////////////////////////////////////////// // Write the Reference Information. const SamReferenceInfo& refInfo = header.getReferenceInfo(); // Get the number of sequences. int32_t numSeq = refInfo.getNumEntries(); ifwrite(filePtr, &numSeq, sizeof(int32_t)); // Write each reference sequence for (int i = 0; i < numSeq; i++) { const char* refName = refInfo.getReferenceName(i); // Add one for the null value. int32_t nameLength = strlen(refName) + 1; // Write the length of the reference name. ifwrite(filePtr, &nameLength, sizeof(int32_t)); // Write the name. ifwrite(filePtr, refName, nameLength); // Write the length of the reference sequence. int32_t refLen = refInfo.getReferenceLength(i); ifwrite(filePtr, &refLen, sizeof(int32_t)); } return(true); }
// Dump the reference information from specified SAM/BAM file. int DumpRefInfo::execute(int argc, char **argv) { // Extract command line arguments. String inFile = ""; bool noeof = false; bool printRecordRefs = false; bool params = false; ParameterList inputParameters; BEGIN_LONG_PARAMETERS(longParameterList) LONG_STRINGPARAMETER("in", &inFile) LONG_PARAMETER("noeof", &noeof) LONG_PARAMETER("printRecordRefs", &printRecordRefs) LONG_PARAMETER("params", ¶ms) LONG_PHONEHOME(VERSION) END_LONG_PARAMETERS(); inputParameters.Add(new LongParameters ("Input Parameters", longParameterList)); // parameters start at index 2 rather than 1. inputParameters.Read(argc, argv, 2); // If no eof block is required for a bgzf file, set the bgzf file type to // not look for it. if(noeof) { // Set that the eof block is not required. BgzfFileType::setRequireEofBlock(false); } // Check to see if the in file was specified, if not, report an error. if(inFile == "") { usage(); inputParameters.Status(); // In file was not specified but it is mandatory. std::cerr << "--in is a mandatory argument, " << "but was not specified" << std::endl; return(-1); } if(params) { inputParameters.Status(); } // Open the input file for reading. SamFile samIn; samIn.OpenForRead(inFile); // Read the sam header. SamFileHeader samHeader; samIn.ReadHeader(samHeader); const SamReferenceInfo& refInfo = samHeader.getReferenceInfo(); int numReferences = refInfo.getNumEntries(); for(int i = 0; i < numReferences; i++) { std::cout << "Reference Index " << i; std::cout << "; Name: " << refInfo.getReferenceName(i) << std::endl; } if(numReferences == 0) { // There is no reference info. std::cerr << "The header contains no reference information.\n"; } // If we are to print the references as found in the records, loop // through reading the records. if(printRecordRefs) { SamRecord samRecord; // Track the prev name/id. std::string prevName = ""; int prevID = -2; int recCount = 0; // track the num records in a ref. // Keep reading records until ReadRecord returns false. while(samIn.ReadRecord(samHeader, samRecord)) { const char* name = samRecord.getReferenceName(); int id = samRecord.getReferenceID(); if((strcmp(name, prevName.c_str()) != 0) || (id != prevID)) { if(prevID != -2) { std::cout << "\tRef ID: " << prevID << "\tRef Name: " << prevName << "\tNumRecs: " << recCount << std::endl; } recCount = 0; prevID = id; prevName = name; } ++recCount; } // Print the last index. if(prevID != -2) { std::cout << "\tRef ID: " << prevID << "\tRef Name: " << prevName << "\tNumRecs: " << recCount << std::endl; } } return(SamStatus::SUCCESS); }