// NOTE: We have 2 different sets of offsets because the copied, new file differs in size than the existing one. // // Unsure which combination of write parameters were used on the original. Things like thread count, // compression level, etc. can effect compression ratio, BGZF block sizes, etc. even though the BAM record // content itself is equal. So we'll just track these index values separately, for now at least. // static PbiRawData Test2Bam_ExistingIndex(void) { PbiRawData index = Test2Bam_CoreIndexData(); index.BasicData().fileOffset_ = { 33816576, 33825163, 33831333, 33834264, 33836542, 33838065, 33849818, 33863499, 33874621, 1392836608 }; return index; }
static PbiRawData Test2Bam_NewIndex(void) { PbiRawData index = Test2Bam_CoreIndexData(); index.BasicData().fileOffset_ = { 33816576, 236126208, 391315456, 469106688, 537067520, 587792384, 867303424, 1182793728, 1449787392, 1582628864 }; return index; }
bool Accepts(const PbiRawData& idx, const size_t row) const { const auto& basicData = idx.BasicData(); // see if row's RGID known const auto& rgId = basicData.rgId_.at(row); const auto rgFound = lookup_.find(rgId); if (rgFound == lookup_.end()) return false; // see if row's ZMW known const auto& zmwPtr = rgFound->second; const auto zmw = basicData.holeNumber_.at(row); const auto zmwFound = zmwPtr->find(zmw); if (zmwFound == zmwPtr->end()) return false; // see if row's QueryStart/QueryEnd known // CCS names already covered in lookup construction phase const auto& queryIntervals = zmwFound->second; const auto qStart = basicData.qStart_.at(row); const auto qEnd = basicData.qEnd_.at(row); const auto queryInterval = std::make_pair(qStart, qEnd); return queryIntervals.find(queryInterval) != queryIntervals.end(); }
bool PbiQueryLengthFilter::Accepts(const PbiRawData& idx, const size_t row) const { const auto& basicData = idx.BasicData(); const auto& qStart = basicData.qStart_.at(row); const auto& qEnd = basicData.qEnd_.at(row); const auto readLength = qEnd - qStart; return CompareHelper(readLength); }
bool PbiAlignedLengthFilter::Accepts(const PbiRawData& idx, const size_t row) const { const auto& mappedData = idx.MappedData(); const auto& aEnd = mappedData.aEnd_.at(row) ; const auto& aStart = mappedData.aStart_.at(row); const auto aLength = aEnd - aStart; return CompareHelper(aLength); }
PbiIndexPrivate::PbiIndexPrivate(const PbiRawData& rawIndex) : filename_(rawIndex.Filename()) , version_(rawIndex.Version()) , sections_(rawIndex.FileSections()) , numReads_(rawIndex.NumReads()) , basicData_(rawIndex.BasicData()) , mappedData_(rawIndex.MappedData()) , referenceData_(rawIndex.ReferenceData()) , barcodeData_(rawIndex.BarcodeData()) { }
bool PbiIdentityFilter::Accepts(const PbiRawData& idx, const size_t row) const { const auto& mappedData = idx.MappedData(); const auto& nMM = mappedData.nMM_.at(row); const auto& nIndels = mappedData.NumDeletedAndInsertedBasesAt(row); const auto& nDel = nIndels.first; const auto& nIns = nIndels.second; const auto& basicData = idx.BasicData(); const auto& qStart = basicData.qStart_.at(row); const auto& qEnd = basicData.qEnd_.at(row); const auto readLength = qEnd - qStart; const auto nonMatches = nMM + nDel + nIns; const float identity = 1.0 - (static_cast<float>(nonMatches)/static_cast<float>(readLength)); return CompareHelper(identity); }
void PbiReferenceNameFilter::Initialize(const PbiRawData& idx) const { const auto pbiFilename = idx.Filename(); const auto bamFilename = pbiFilename.substr(0, pbiFilename.length() - 4); const auto bamFile = BamFile{ bamFilename }; // single-value if (rnameWhitelist_ == boost::none) { const auto tId = bamFile.ReferenceId(rname_); subFilter_ = PbiReferenceIdFilter{ tId, cmp_ }; } // multi-value whitelist else { subFilter_ = PbiFilter(PbiFilter::UNION); for (const auto& rname : rnameWhitelist_.get()) subFilter_.Add(PbiReferenceIdFilter{ bamFile.ReferenceId(rname) }); } initialized_ = true; }
static PbiRawData Test2Bam_CoreIndexData(void) { PbiRawData rawData; rawData.Version(PbiFile::Version_3_0_1); rawData.FileSections(PbiFile::BASIC | PbiFile::MAPPED | PbiFile::REFERENCE); rawData.NumReads(10); PbiRawBasicData& basicData = rawData.BasicData(); basicData.rgId_ = { -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594, -1197849594 }; basicData.qStart_ = {48,387,0,9936,10232,7468,5557,7285,426,7064}; basicData.qEnd_ = {1132,1134,344,10187,10394,8906,7235,8657,1045,7421}; basicData.holeNumber_ = {49050,32328,32328,6469,6469,30983,13473,13473,19915,30983}; basicData.readQual_ = {0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6}; basicData.ctxtFlag_ = {0,0,0,0,0,0,0,0,0,0}; basicData.fileOffset_ = { 33816576, 33825163, 33831333, 33834264, 33836542, 33838065, 33849818, 33863499, 33874621, 1392836608 }; PbiRawMappedData& mappedData = rawData.MappedData(); mappedData.tId_ = {0,0,0,0,0,0,0,0,0,0}; mappedData.tStart_ = {0,302,675,2170,2203,3572,4506,4507,4592,4669}; mappedData.tEnd_ = {471,1019,1026,2397,2326,5015,6125,5850,5203,5011}; mappedData.aStart_ = {653,395,1,9960,10271,7468,5574,7285,441,7075}; mappedData.aEnd_ = {1129,1134,344,10185,10394,8906,7235,8647,1040,7418}; mappedData.revStrand_ = {0,1,0,1,0,1,1,0,1,0}; mappedData.nM_ = {460,704,339,216,118,1394,1581,1313,583,333}; mappedData.nMM_ = {0,0,0,0,0,0,0,0,0,0}; mappedData.mapQV_ = {254,254,254,254,254,254,254,254,254,254}; PbiRawReferenceData& referenceData = rawData.ReferenceData(); referenceData.entries_ = { PbiReferenceEntry{0,0,10}, PbiReferenceEntry{4294967295,4294967295,4294967295} }; return rawData; }
static void ExpectRawIndicesEqual(const PbiRawData& expected, const PbiRawData& actual) { // header data EXPECT_EQ(expected.Version(), actual.Version()); EXPECT_EQ(expected.FileSections(), actual.FileSections()); EXPECT_EQ(expected.NumReads(), actual.NumReads()); // subread data const PbiRawBasicData& e = expected.BasicData(); const PbiRawBasicData& a = actual.BasicData(); EXPECT_EQ(e.rgId_, a.rgId_); EXPECT_EQ(e.qStart_, a.qStart_); EXPECT_EQ(e.qEnd_, a.qEnd_); EXPECT_EQ(e.holeNumber_, a.holeNumber_); EXPECT_EQ(e.readQual_, a.readQual_); EXPECT_EQ(e.ctxtFlag_, a.ctxtFlag_); EXPECT_EQ(e.fileOffset_, a.fileOffset_); // mapped data EXPECT_EQ(expected.HasMappedData(), actual.HasMappedData()); if (expected.HasMappedData() && actual.HasMappedData()) { const PbiRawMappedData& e = expected.MappedData(); const PbiRawMappedData& a = actual.MappedData(); EXPECT_EQ(e.tId_, a.tId_); EXPECT_EQ(e.tStart_, a.tStart_); EXPECT_EQ(e.tEnd_, a.tEnd_); EXPECT_EQ(e.aStart_, a.aStart_); EXPECT_EQ(e.aEnd_, a.aEnd_); EXPECT_EQ(e.revStrand_, a.revStrand_); EXPECT_EQ(e.nM_, a.nM_); EXPECT_EQ(e.nMM_, a.nMM_); EXPECT_EQ(e.mapQV_, a.mapQV_); } // reference data EXPECT_EQ(expected.HasReferenceData(), actual.HasReferenceData()); if (expected.HasReferenceData() && actual.HasReferenceData()) { const PbiRawReferenceData& e = expected.ReferenceData(); const PbiRawReferenceData& a = actual.ReferenceData(); EXPECT_EQ(e.entries_, a.entries_); } // barcode data EXPECT_EQ(expected.HasBarcodeData(), actual.HasBarcodeData()); if (expected.HasBarcodeData() && actual.HasBarcodeData()) { const PbiRawBarcodeData& e = expected.BarcodeData(); const PbiRawBarcodeData& a = actual.BarcodeData(); EXPECT_EQ(e.bcForward_, a.bcForward_); EXPECT_EQ(e.bcReverse_, a.bcReverse_); EXPECT_EQ(e.bcQual_, a.bcQual_); } }