void parseOutRG(SamFileHeader& header, std::string& noRgPgString, SamFileHeader* newHeader) { noRgPgString.clear(); // strings for comparing if two RGs with same ID are the same. static std::string prevString = ""; static std::string newString = ""; SamHeaderRecord* rec = header.getNextHeaderRecord(); while(rec != NULL) { if(rec->getType() == SamHeaderRecord::RG) { if(newHeader != NULL) { // This is an RG line. // First check if this RG is already included in the new header. SamHeaderRG* prevRG = newHeader->getRG(rec->getTagValue("ID")); if(prevRG != NULL) { // This RG already exists, check that they are the same. // If they are the same, there is nothing to do. bool status = true; prevString.clear(); newString.clear(); status &= prevRG->appendString(prevString); status &= rec->appendString(newString); if(prevString != newString) { // They are not identical, so report an error. Logger::gLogger->error("Failed to add readgroup to header, " "duplicate, but non-identical RG ID, %s", rec->getTagValue("ID")); } } else { // This RG does not exist yet, so add it to the new header. if(!newHeader->addRecordCopy((SamHeaderRG&)(*rec))) { // Failed to add the RG, exit. Logger::gLogger->error("Failed to add readgroup to header, %s", newHeader->getErrorMessage()); } } } } else if(rec->getType() == SamHeaderRecord::PG) { if(newHeader != NULL) { // This is a PG line. // First check if this PG is already included in the new header. SamHeaderPG* prevPG = newHeader->getPG(rec->getTagValue("ID")); if(prevPG != NULL) { // This PG already exists, check if they are the same. // If they are the same, there is nothing to do. bool status = true; prevString.clear(); newString.clear(); status &= prevPG->appendString(prevString); status &= rec->appendString(newString); if(prevString != newString) { // They are not identical, ignore for now. // TODO: change the ID, and add it. Logger::gLogger->warning("Warning: dropping duplicate, " "but non-identical PG ID, %s", rec->getTagValue("ID")); } } else { // This PG does not exist yet, so add it to the new header. if(!newHeader->addRecordCopy((SamHeaderPG&)(*rec))) { // Failed to add the PG, exit. Logger::gLogger->error("Failed to add PG to header, %s", newHeader->getErrorMessage()); } } } } else { rec->appendString(noRgPgString); } rec = header.getNextHeaderRecord(); } // Append the comments. header.appendCommentLines(noRgPgString); }
void Bam2FastQ::writeFastQ(SamRecord& samRec, IFILE filePtr, const std::string& fileNameExt, const char* readNameExt) { static int16_t flag; static std::string sequence; static String quality; static std::string rg; static std::string rgFastqExt; static std::string rgListStr; static std::string fileName; static std::string fq2; if(mySplitRG) { rg = samRec.getString("RG").c_str(); rgFastqExt = rg + fileNameExt; OutFastqMap::iterator it; it = myOutFastqs.find(rgFastqExt); if(it == myOutFastqs.end()) { // New file. fileName = myOutBase.c_str(); if(rg != "") { fileName += '.'; } else { rg = "."; } fileName += rgFastqExt; filePtr = ifopen(fileName.c_str(), "w", myCompression); myOutFastqs[rgFastqExt] = filePtr; if(fileNameExt != mySecondFileNameExt) { // first end. const char* sm = mySamHeader.getRGTagValue("SM", rg.c_str()); if(strcmp(sm, "") == 0){sm = myOutBase.c_str();} rgListStr.clear(); SamHeaderRG* rgPtr = mySamHeader.getRG(rg.c_str()); if((rgPtr == NULL) || (!rgPtr->appendString(rgListStr))) { // No RG info for this record. rgListStr = ".\n"; } fq2 = "."; if(fileNameExt == myFirstFileNameExt) { fq2 = myOutBase.c_str(); if(rg != ".") { fq2 += '.'; fq2 += rg; } fq2 += mySecondFileNameExt; } ifprintf(myFqList, "%s\t%s\t%s\t%s", sm, fileName.c_str(), fq2.c_str(), rgListStr.c_str()); } } else { filePtr = it->second; } } if(filePtr == NULL) { throw(std::runtime_error("Programming ERROR/EXITING: Bam2FastQ filePtr not set.")); return; } flag = samRec.getFlag(); const char* readName = samRec.getReadName(); sequence = samRec.getSequence(); if(myQField.IsEmpty()) { // Read the quality from the quality field quality = samRec.getQuality(); } else { // Read Quality from the specified tag const String* qTagPtr = samRec.getStringTag(myQField.c_str()); if((qTagPtr != NULL) && (qTagPtr->Length() == (int)sequence.length())) { // Use the tag value for quality quality = qTagPtr->c_str(); } else { // Tag was not found, so use the quality field. ++myNumQualTagErrors; if(myNumQualTagErrors == 1) { std::cerr << "Bam2FastQ: " << myQField.c_str() << " tag was not found/invalid, so using the quality field in records without the tag\n"; } quality = samRec.getQuality(); } } if(SamFlag::isReverse(flag) && myReverseComp) { // It is reverse, so reverse compliment the sequence BaseUtilities::reverseComplement(sequence); // Reverse the quality. quality.Reverse(); } else { // Ensure it is all capitalized. int seqLen = sequence.size(); for (int i = 0; i < seqLen; i++) { sequence[i] = (char)toupper(sequence[i]); } } if(myRNPlus) { ifprintf(filePtr, "@%s%s\n%s\n+%s%s\n%s\n", readName, readNameExt, sequence.c_str(), readName, readNameExt, quality.c_str()); } else { ifprintf(filePtr, "@%s%s\n%s\n+\n%s\n", readName, readNameExt, sequence.c_str(), quality.c_str()); } // Release the record. myPool.releaseRecord(&samRec); }
void parseOutRG(SamFileHeader& header, std::string& noRgPgString, SamFileHeader* newHeader, bool ignorePI) { noRgPgString.clear(); // strings for comparing if two RGs with same ID are the same. static std::string prevString = ""; static std::string newString = ""; SamHeaderRecord* rec = header.getNextHeaderRecord(); while(rec != NULL) { if(rec->getType() == SamHeaderRecord::RG) { if(newHeader != NULL) { // This is an RG line. // First check if this RG is already included in the new header. SamHeaderRG* prevRG = newHeader->getRG(rec->getTagValue("ID")); if(prevRG != NULL) { // This RG already exists, check that they are the same. // If they are the same, there is nothing to do. bool status = true; prevString.clear(); newString.clear(); status &= prevRG->appendString(prevString); status &= rec->appendString(newString); if(prevString != newString) { if(!ignorePI) { Logger::gLogger->error("Failed to add readgroup to " "header, duplicate, but " "non-identical RG ID, %s\n" "prev:\t(%s)\nnew:\t(%s)", rec->getTagValue("ID"), prevString.c_str(), newString.c_str()); } else { // Check for a PI string. size_t prevPIStart = prevString.find("PI:"); size_t newPIStart = newString.find("PI:"); // If they are both npos, then PI was not found // so fail. if((prevPIStart == std::string::npos) && (newPIStart == std::string::npos)) { // They are not identical, so report an error. Logger::gLogger->error("Failed to add readgroup" " to header, duplicate," " but non-identical RG" " ID, %s\n" "prev:\t(%s)\nnew:\t(%s)", rec->getTagValue("ID"), prevString.c_str(), newString.c_str()); } else { // PI found in one or both strings. size_t prevPIEnd; size_t newPIEnd; if(prevPIStart == std::string::npos) { // new string has PI, so compare to the start of that. prevPIStart = newPIStart; prevPIEnd = newPIStart; } else { prevPIEnd = prevString.find('\t', prevPIStart) + 1; } if(newPIStart == std::string::npos) { // new string has PI, so compare to the start of that. newPIStart = prevPIStart; newPIEnd = newPIStart; } else { newPIEnd = newString.find('\t', newPIStart) + 1; } // Compare before PI. if((newString.compare(0, newPIStart, prevString, 0, prevPIStart) != 0) || (newString.compare(newPIEnd, std::string::npos, prevString, prevPIEnd, std::string::npos) != 0)) { // They are not identical, so report an error. Logger::gLogger->error("Failed to add readgroup to header, " "duplicate, but non-identical RG ID, %s, " "even when ignoring PI\n" "prev:\t(%s)\nnew:\t(%s)", rec->getTagValue("ID"), prevString.c_str(), newString.c_str()); } else { Logger::gLogger->warning("Warning: ignoring non-identical PI field " "for RG ID, %s", rec->getTagValue("ID")); } } } } } else { // This RG does not exist yet, so add it to the new header. if(!newHeader->addRecordCopy((SamHeaderRG&)(*rec))) { // Failed to add the RG, exit. Logger::gLogger->error("Failed to add readgroup to header, %s", newHeader->getErrorMessage()); } } } } else if(rec->getType() == SamHeaderRecord::PG) { if(newHeader != NULL) { // This is a PG line. // First check if this PG is already included in the new header. SamHeaderPG* prevPG = newHeader->getPG(rec->getTagValue("ID")); if(prevPG != NULL) { // This PG already exists, check if they are the same. // If they are the same, there is nothing to do. bool status = true; prevString.clear(); newString.clear(); status &= prevPG->appendString(prevString); status &= rec->appendString(newString); if(prevString != newString) { // They are not identical, ignore for now. // TODO: change the ID, and add it. Logger::gLogger->warning("Warning: dropping duplicate, " "but non-identical PG ID, %s", rec->getTagValue("ID")); } } else { // This PG does not exist yet, so add it to the new header. if(!newHeader->addRecordCopy((SamHeaderPG&)(*rec))) { // Failed to add the PG, exit. Logger::gLogger->error("Failed to add PG to header, %s", newHeader->getErrorMessage()); } } } } else { rec->appendString(noRgPgString); } rec = header.getNextHeaderRecord(); } // Append the comments. header.appendCommentLines(noRgPgString); }