TEST_F(RegionUtilTestFixture, GetFullPassSubreadIndices ) { vector<int> vi = GetFullPassSubreadIndices(subreadIntervals, adapterIntervals); // vi = 0, ..., 27 EXPECT_EQ(vi.size(), 28); for(int i=0; i < 28; i++) { EXPECT_EQ(vi[i], i); } }
// Given a vector of subreads and a vector of adapters, return // index of the median length subread which has both // adapters before & after itself. If no full-pass subreads are // available, return -1. int GetMedianLengthFullSubreadIndex(std::vector<ReadInterval> &subreadIntervals, std::vector<ReadInterval> &adapterIntervals) { std::vector<int> indices = GetFullPassSubreadIndices(subreadIntervals, adapterIntervals); if (indices.size() == 0) return -1; std::vector<std::pair<int, int>> indices_lens; for (int i = 0; i < static_cast<int>(indices.size()); i++) { ReadInterval &subread = subreadIntervals[indices[i]]; indices_lens.push_back(std::make_pair(indices[i], subread.end - subread.start)); } std::sort(indices_lens.begin(), indices_lens.end(), cmp_index_len_pair); return indices_lens[int(indices_lens.size() / 2)].first; }
// Given a vector of subreads and a vector of adapters, return // index of the typical fullpass subread which can represent subreads // of this zmw. // * if there is no fullpass subread, return -1; // * if number of fullpass subreads is less than 4, return index of the // left-most longest subread // * if number of fullpass subreads is greater than or equal 4, // * if length of the longest read does not exceed // meanLength + 1.96 * deviationLength // then, return index of the longest left-most subread // * otherwise, return index of the second longest left-most subread int GetTypicalFullSubreadIndex(std::vector<ReadInterval> &subreadIntervals, std::vector<ReadInterval> &adapterIntervals) { std::vector<int> indices = GetFullPassSubreadIndices(subreadIntervals, adapterIntervals); if (indices.size() == 0) return -1; // no full-pass subread in this zmw std::vector<std::pair<int, int>> indices_lens; std::vector<int> lengths; for (int i = 0; i < static_cast<int>(indices.size()); i++) { ReadInterval &subread = subreadIntervals[indices[i]]; indices_lens.push_back(std::make_pair(indices[i], subread.end - subread.start)); lengths.push_back(subread.end - subread.start); } std::sort(indices_lens.begin(), indices_lens.end(), cmp_index_len_pair); int longestIndex = indices_lens[int(indices_lens.size() - 1)].first; int secondLongestIndex = (indices_lens.size() <= 1) ? (-1) : (indices_lens[int(indices_lens.size() - 2)].first); if (indices.size() < 4) { // very few fullpass subreads, use the longest subread anyway. return longestIndex; } else { // if length of the longest falls out of 95% CI of all other // fullpass subreads, use the second longest. sort(lengths.begin(), lengths.end()); float meanLength, varLength; MeanVar(lengths, meanLength, varLength); if (lengths[int(lengths.size() - 1)] > meanLength + 1.96 * sqrt(varLength)) { return secondLongestIndex; } else { return longestIndex; } } }