Ejemplo n.º 1
0
TEST_F(RegionUtilTestFixture, GetHighQulitySubreadsIntervals) {
    vector<ReadInterval> intervals;
    intervals.push_back(ReadInterval(0, 712));
    intervals.push_back(ReadInterval(760, 2040));
    intervals.push_back(ReadInterval(2098, 3452));

    vector<int> directions;
    directions.push_back(0);
    directions.push_back(1);
    directions.push_back(0);

    int indx = GetHighQualitySubreadsIntervals(intervals, directions, hqStart, hqEnd);
    EXPECT_EQ(intervals.size(), 3);
    EXPECT_EQ(indx, 2);
    int starts [3] = {0, 760, 2098};
    int ends   [3] = {712, 2040, 3424};
    int ds     [3] = {0, 1, 0};
    for(int i=0; i < 3; i++) {
        EXPECT_EQ(intervals[i].start, starts[i]);
        EXPECT_EQ(intervals[i].end  , ends[i]  );
        EXPECT_EQ(directions[i]     , ds[i]    );
    }

    indx = GetHighQualitySubreadsIntervals(intervals, directions, hqStart, hqEnd, 800);
    EXPECT_EQ(intervals.size(), 2);
    // The first interval and its direction has been removed as the length is less
    // than 800.
    for(int i=0; i < 2; i++) {
        EXPECT_EQ(intervals[i].start, starts[i+1]);
        EXPECT_EQ(intervals[i].end  , ends[i+1]  );
    }


}
void FragmentCCSIterator::
Initialize(CCSSequence *_seqPtr, RegionTable *_regionTablePtr) {
    seqPtr         = _seqPtr;
    regionTablePtr = _regionTablePtr;
    curPass = 0;
    numPasses = 0;
    subreadIntervals.clear();
    readIntervalDirection.clear();

    int hqRegionStart, hqRegionEnd, hqRegionScore;
    hqRegionStart = hqRegionEnd = hqRegionScore = 0;

    bool hasHQRegion = LookupHQRegion(seqPtr->zmwData.holeNumber, 
        *regionTablePtr, hqRegionStart, hqRegionEnd, hqRegionScore);

    if (not hasHQRegion) {
        return; // Don't bother if there is no HQ region.
    }

    //
    // Since this iterator covers all passes, and not just those
    // included in the ccs, the the regions need to be loaded.
    //
    CollectSubreadIntervals(*seqPtr, regionTablePtr, subreadIntervals);
    if (subreadIntervals.size() == 0) { return;}

    readIntervalDirection.resize(subreadIntervals.size());
    fill(readIntervalDirection.begin(), readIntervalDirection.end(), 2);

    //
    // Assign the read interval directions based on the pass direction
    // for the pass that has a similar start position.  This allows
    // some wiggle although in practice they coordinates of the pass
    // start base and the template should always match up. 
    //
    int i, j;
    for (i = 0; i < subreadIntervals.size(); i++) {
        for (j = 0; j < seqPtr->passStartBase.size(); j++) {
            if (abs( ((int)subreadIntervals[i].start)  - 
                     ((int)seqPtr->passStartBase[j]) ) < 10) {
                readIntervalDirection[i] = seqPtr->passDirection[j];
                break;
            }
        }
    }

    int firstAssignedSubread = 0;
    while (firstAssignedSubread < subreadIntervals.size() and 
           readIntervalDirection[firstAssignedSubread] == 2) { 
        firstAssignedSubread++; 
    }

    if (firstAssignedSubread == subreadIntervals.size()) {
        // None of the subread has been assigned a direction, guess.
        firstAssignedSubread = 0;
        readIntervalDirection[0] = 0;
    }

    // Assign directions to intervals to the left of the first assigned.
    if (firstAssignedSubread < subreadIntervals.size() and 
        subreadIntervals.size() > 0) {
        int curSubreadDir = readIntervalDirection[firstAssignedSubread];
        assert(curSubreadDir == 0 or curSubreadDir == 1);
        for (i = firstAssignedSubread - 1; i >= 0; i--) {
            curSubreadDir = (curSubreadDir==0)?1:0;
            readIntervalDirection[i] = curSubreadDir;
        }
    }

    // Assign directions to intervals which are to the right of the first 
    // assigned and whose direction is unknown.
    for (i = firstAssignedSubread + 1; i < subreadIntervals.size(); i++) {
        int & di = readIntervalDirection[i];
        int   dp = readIntervalDirection[i-1]; 
        if (di != 0 and di != 1) {
            di = (dp==0)?1:0; 
        }
    }

    //
    // So far, subreadIntervals have been sorted and each assigned 
    // a passDirection. But since all or part of a subreadInterval 
    // may not be in the HQ region, we need to trim low quality regions 
    // from subreads, remove subreads which do not have any high quality 
    // regions from subreadIntervals and their corresponding pass directions 
    // from readIntervalDirection. 
    //
    GetHighQualitySubreadsIntervals(subreadIntervals, 
            readIntervalDirection,
            hqRegionStart, hqRegionEnd);
    // Update number of passes. 
    numPasses = subreadIntervals.size();
}