// // Main // int filterBAMMain(int argc, char** argv) { parseFilterBAMOptions(argc, argv); // Read the graph if distance-filtering mode is enabled StringGraph* pGraph = NULL; if(!opt::asqgFile.empty()) pGraph = SGUtil::loadASQG(opt::asqgFile, 0, false); // Read the BWTs if depth-filtering mode is enabled BWT* pBWT = NULL; BWT* pRBWT = NULL; if(!opt::fmIndexPrefix.empty()) { pBWT = new BWT(opt::fmIndexPrefix + BWT_EXT, opt::sampleRate); pRBWT = new BWT(opt::fmIndexPrefix + RBWT_EXT, opt::sampleRate); } Timer* pTimer = new Timer(PROGRAM_IDENT); // int numPairsTotal = 0; int numPairsFilteredByDistance = 0; int numPairsFilteredByER = 0; int numPairsFilteredByQuality = 0; int numPairsFilteredByDepth = 0; int numPairsUnmapped = 0; int numPairsWrote = 0; // Open the bam files for reading/writing BamTools::BamReader* pBamReader = new BamTools::BamReader; pBamReader->Open(opt::bamFile); BamTools::BamWriter* pBamWriter = new BamTools::BamWriter; pBamWriter->Open(opt::outFile, pBamReader->GetHeaderText(), pBamReader->GetReferenceData()); const BamTools::RefVector& referenceVector = pBamReader->GetReferenceData(); BamTools::BamAlignment record1; BamTools::BamAlignment record2; bool done = false; while(!done) { if(numPairsTotal++ % 200000 == 0) printf("[sga filterBAM] Processed %d pairs\n", numPairsTotal); done = !readAlignmentPair(pBamReader, record1, record2); if(done) break; if(!record1.IsMapped() || !record2.IsMapped()) { numPairsUnmapped += 1; continue; } // Ensure the pairing is correct if(record1.Name != record2.Name) { std::cout << "NAME FAIL: " << record1.Name << " " << record2.Name << "\n"; } assert(record1.Name == record2.Name); bool bPassedFilters = true; // Check if the error rate is below the max double er1 = getErrorRate(record1); double er2 = getErrorRate(record2); if(er1 > opt::maxError || er2 > opt::maxError) { bPassedFilters = false; numPairsFilteredByER += 1; } if(record1.MapQuality < opt::minQuality || record2.MapQuality < opt::minQuality) { bPassedFilters = false; numPairsFilteredByQuality += 1; } // Perform depth check for pairs aligning to different contigs if(bPassedFilters && (pBWT != NULL && pRBWT != NULL && opt::maxKmerDepth > 0) && (record1.RefID != record2.RefID)) { int maxDepth1 = getMaxKmerDepth(record1.QueryBases, pBWT, pRBWT); int maxDepth2 = getMaxKmerDepth(record1.QueryBases, pBWT, pRBWT); if(maxDepth1 > opt::maxKmerDepth || maxDepth2 > opt::maxKmerDepth) { bPassedFilters = false; numPairsFilteredByDepth += 1; } } // Perform short-insert pair check if(pGraph != NULL) { bPassedFilters = bPassedFilters && filterByGraph(pGraph, referenceVector, record1, record2); numPairsFilteredByDistance += 1; } if(bPassedFilters) { pBamWriter->SaveAlignment(record1); pBamWriter->SaveAlignment(record2); numPairsWrote += 1; } } std::cout << "Total pairs: " << numPairsTotal << "\n"; std::cout << "Total pairs output: " << numPairsWrote << "\n"; std::cout << "Total filtered because one pair is unmapped: " << numPairsUnmapped << "\n"; std::cout << "Total filtered by distance: " << numPairsFilteredByDistance << "\n"; std::cout << "Total filtered by error rate: " << numPairsFilteredByER << "\n"; std::cout << "Total filtered by quality: " << numPairsFilteredByQuality << "\n"; std::cout << "Total filtered by depth: " << numPairsFilteredByDepth << "\n"; if(pGraph != NULL) delete pGraph; if(pBWT != NULL) delete pBWT; if(pRBWT != NULL) delete pRBWT; pBamWriter->Close(); pBamReader->Close(); delete pTimer; delete pBamReader; delete pBamWriter; return 0; }