void ComputeSubreadIntervals(vector<SubreadInterval>* const intervals, RegionTable& regionTable, const unsigned holeNumber) { constexpr int RegionStart = RegionAnnotation::REGIONSTARTCOL; constexpr int RegionEnd = RegionAnnotation::REGIONENDCOL; // clear the input first intervals->clear(); RegionAnnotations zmwRegions = regionTable[holeNumber]; // Has non-empty HQRegion or not? if (not zmwRegions.HasHQRegion()) return; //throw runtime_error("could not find HQRegion for ZMW " + to_string(holeNumber)); size_t hqStart = zmwRegions.HQStart(); size_t hqEnd = zmwRegions.HQEnd(); if (hqEnd <= hqStart) return; // this logic mirrors that in the C# codebase for DelimitedSeqRegions rather // than what's in src/SubreadConverter.cpp for verification purposes ReadInterval const * lastAdapter = nullptr; bool prevIsAdapter = false; size_t regStart = hqStart; vector<ReadInterval> adapters = zmwRegions.AdapterIntervals(); for (size_t i = 0; i < adapters.size(); i++) { ReadInterval adapter = adapters[i]; size_t adapterStart = adapter.start; size_t adapterEnd = adapter.end; if (adapterEnd < hqStart) continue; if (adapterStart > hqEnd) break; if (prevIsAdapter) intervals->emplace_back(SubreadInterval(lastAdapter->end, adapterStart, true, true)); else if (regStart < adapterStart) intervals->emplace_back(SubreadInterval(regStart, adapterStart, false, true)); lastAdapter = &adapters[i]; prevIsAdapter = true; regStart = adapterEnd; } if (prevIsAdapter) intervals->emplace_back(SubreadInterval(lastAdapter->end, hqEnd, true, false)); else if (regStart < hqEnd) intervals->emplace_back(SubreadInterval(regStart, hqEnd, false, false)); }
// General functions. bool LookupHQRegion(int holeNumber, RegionTable ®ionTable, int &start, int &end, int &score) { if (regionTable.HasHoleNumber(holeNumber)) { RegionAnnotations zmwRegions = regionTable[holeNumber]; if (zmwRegions.HasHQRegion()) { start = zmwRegions.HQStart(); end = zmwRegions.HQEnd(); score = zmwRegions.HQScore(); return true; } } start = end = score = 0; return false; }
static SubreadInterval ComputeSubreadIntervals(deque<SubreadInterval>* const intervals, deque<SubreadInterval>* const adapters, RegionTable& regionTable, const unsigned holeNumber, const size_t readLength) { constexpr int RegionStart = RegionAnnotation::REGIONSTARTCOL; constexpr int RegionEnd = RegionAnnotation::REGIONENDCOL; // clear the input first intervals->clear(); adapters->clear(); // region annotations of a zmw RegionAnnotations zmwRegions = regionTable[holeNumber]; // Has non-empty HQregion or not? if (!zmwRegions.HasHQRegion()) return SubreadInterval(0, 0); size_t hqStart = zmwRegions.HQStart(); size_t hqEnd = zmwRegions.HQEnd(); // Catch and repair 1-off errors in the HQ region hqEnd = (hqEnd == readLength-1) ? readLength : hqEnd; // Catch empty or invalid HQ regions and return empty if (hqEnd <= hqStart) return SubreadInterval(0, 0); // adapter intervals of this zmw vector<ReadInterval> adapterIntervals = zmwRegions.AdapterIntervals(); size_t subreadStart = hqStart; bool adapterBefore = false; for (size_t i = 0; i < adapterIntervals.size(); i++) { size_t adapterStart = adapterIntervals[i].start; size_t adapterEnd = adapterIntervals[i].end; // if we're not in the HQRegion yet, skip ahead if (hqStart > adapterEnd) continue; // if the adapter is beyond the HQRegion, we're done if (hqEnd < adapterStart) break; // If the subread is greater than length=0, save it if (subreadStart < adapterStart) intervals->emplace_back(SubreadInterval(subreadStart, adapterStart, adapterBefore, true)); // Save the region of the adapter that overlaps the HQ region adapters->emplace_back(SubreadInterval(MAX3(adapterStart, hqStart, subreadStart), min(adapterEnd, hqEnd))); subreadStart = adapterEnd; adapterBefore = true; } // Save any region between the last adatper and the end of the HQ region as a subread if (subreadStart < hqEnd) intervals->emplace_back(SubreadInterval(subreadStart, hqEnd, adapterBefore, false)); return SubreadInterval(hqStart, hqEnd); }