예제 #1
0
TEST_F(RegionUtilTestFixture, GetFullPassSubreadIndices ) {
    vector<int> vi = GetFullPassSubreadIndices(subreadIntervals, adapterIntervals);
    // vi = 0, ..., 27
    EXPECT_EQ(vi.size(), 28);
    for(int i=0; i < 28; i++) {
        EXPECT_EQ(vi[i], i);
    }
}
// Given a vector of subreads and a vector of adapters, return
// index of the median length subread which has both
// adapters before & after itself. If no full-pass subreads are
// available, return -1.
int GetMedianLengthFullSubreadIndex(std::vector<ReadInterval> &subreadIntervals,
                                    std::vector<ReadInterval> &adapterIntervals)
{

    std::vector<int> indices = GetFullPassSubreadIndices(subreadIntervals, adapterIntervals);
    if (indices.size() == 0) return -1;
    std::vector<std::pair<int, int>> indices_lens;

    for (int i = 0; i < static_cast<int>(indices.size()); i++) {
        ReadInterval &subread = subreadIntervals[indices[i]];
        indices_lens.push_back(std::make_pair(indices[i], subread.end - subread.start));
    }
    std::sort(indices_lens.begin(), indices_lens.end(), cmp_index_len_pair);
    return indices_lens[int(indices_lens.size() / 2)].first;
}
// Given a vector of subreads and a vector of adapters, return
// index of the typical fullpass subread which can represent subreads
// of this zmw.
// * if there is no fullpass subread, return -1;
// * if number of fullpass subreads is less than 4, return index of the
//   left-most longest subread
// * if number of fullpass subreads is greater than or equal 4,
//   * if length of the longest read does not exceed
//      meanLength + 1.96 * deviationLength
//     then, return index of the longest left-most subread
//   * otherwise, return index of the second longest left-most subread
int GetTypicalFullSubreadIndex(std::vector<ReadInterval> &subreadIntervals,
                               std::vector<ReadInterval> &adapterIntervals)
{

    std::vector<int> indices = GetFullPassSubreadIndices(subreadIntervals, adapterIntervals);
    if (indices.size() == 0) return -1;  // no full-pass subread in this zmw
    std::vector<std::pair<int, int>> indices_lens;
    std::vector<int> lengths;

    for (int i = 0; i < static_cast<int>(indices.size()); i++) {
        ReadInterval &subread = subreadIntervals[indices[i]];
        indices_lens.push_back(std::make_pair(indices[i], subread.end - subread.start));
        lengths.push_back(subread.end - subread.start);
    }

    std::sort(indices_lens.begin(), indices_lens.end(), cmp_index_len_pair);

    int longestIndex = indices_lens[int(indices_lens.size() - 1)].first;
    int secondLongestIndex =
        (indices_lens.size() <= 1) ? (-1) : (indices_lens[int(indices_lens.size() - 2)].first);

    if (indices.size() < 4) {
        // very few fullpass subreads, use the longest subread anyway.
        return longestIndex;
    } else {
        // if length of the longest falls out of 95% CI of all other
        // fullpass subreads, use the second longest.
        sort(lengths.begin(), lengths.end());
        float meanLength, varLength;
        MeanVar(lengths, meanLength, varLength);
        if (lengths[int(lengths.size() - 1)] > meanLength + 1.96 * sqrt(varLength)) {
            return secondLongestIndex;
        } else {
            return longestIndex;
        }
    }
}