Example #1
0
void ComputeSubreadIntervals(vector<SubreadInterval>* const intervals,
                             RegionTable& regionTable,
                             const unsigned holeNumber)
{
    constexpr int RegionStart = RegionAnnotation::REGIONSTARTCOL;
    constexpr int RegionEnd   = RegionAnnotation::REGIONENDCOL;

    // clear the input first
    intervals->clear();

    RegionAnnotations zmwRegions = regionTable[holeNumber];

    // Has non-empty HQRegion or not?
    if (not zmwRegions.HasHQRegion())
        return;
        //throw runtime_error("could not find HQRegion for ZMW " + to_string(holeNumber));

    size_t hqStart = zmwRegions.HQStart();
    size_t hqEnd   = zmwRegions.HQEnd();

    if (hqEnd <= hqStart)
        return;

    // this logic mirrors that in the C# codebase for DelimitedSeqRegions rather
    // than what's in src/SubreadConverter.cpp for verification purposes
    ReadInterval const * lastAdapter = nullptr;
    bool prevIsAdapter = false;
    size_t regStart = hqStart;
    vector<ReadInterval> adapters = zmwRegions.AdapterIntervals();
    for (size_t i = 0; i < adapters.size(); i++) { 
        ReadInterval adapter = adapters[i];
        size_t adapterStart = adapter.start;
        size_t adapterEnd   = adapter.end;

        if (adapterEnd < hqStart)
            continue;

        if (adapterStart > hqEnd)
            break;

        if (prevIsAdapter)
            intervals->emplace_back(SubreadInterval(lastAdapter->end, adapterStart, true, true));
        else if (regStart < adapterStart)
            intervals->emplace_back(SubreadInterval(regStart, adapterStart, false, true));

        lastAdapter = &adapters[i];
        prevIsAdapter = true;
        regStart = adapterEnd;
    }

    if (prevIsAdapter)
        intervals->emplace_back(SubreadInterval(lastAdapter->end, hqEnd, true, false));
    else if (regStart < hqEnd)
        intervals->emplace_back(SubreadInterval(regStart, hqEnd, false, false));
}
// General functions.
bool LookupHQRegion(int holeNumber, RegionTable &regionTable, int &start, int &end, int &score)
{

    if (regionTable.HasHoleNumber(holeNumber)) {
        RegionAnnotations zmwRegions = regionTable[holeNumber];
        if (zmwRegions.HasHQRegion()) {
            start = zmwRegions.HQStart();
            end = zmwRegions.HQEnd();
            score = zmwRegions.HQScore();
            return true;
        }
    }

    start = end = score = 0;
    return false;
}
Example #3
0
static
SubreadInterval ComputeSubreadIntervals(deque<SubreadInterval>* const intervals,
                                        deque<SubreadInterval>* const adapters,
                                        RegionTable& regionTable,
                                        const unsigned holeNumber,
                                        const size_t readLength)
{
    constexpr int RegionStart = RegionAnnotation::REGIONSTARTCOL;
    constexpr int RegionEnd   = RegionAnnotation::REGIONENDCOL;

    // clear the input first
    intervals->clear();
    adapters->clear();

    // region annotations of a zmw
    RegionAnnotations zmwRegions = regionTable[holeNumber];

    // Has non-empty HQregion or not?
    if (!zmwRegions.HasHQRegion())
        return SubreadInterval(0, 0);

    size_t hqStart = zmwRegions.HQStart();
    size_t hqEnd   = zmwRegions.HQEnd();

    // Catch and repair 1-off errors in the HQ region
    hqEnd = (hqEnd == readLength-1) ? readLength : hqEnd;

    // Catch empty or invalid HQ regions and return empty
    if (hqEnd <= hqStart)
        return SubreadInterval(0, 0);

    // adapter intervals of this zmw
    vector<ReadInterval> adapterIntervals = zmwRegions.AdapterIntervals();

    size_t subreadStart  = hqStart;
    bool   adapterBefore = false;

    for (size_t i = 0; i < adapterIntervals.size(); i++) {

        size_t adapterStart = adapterIntervals[i].start;
        size_t adapterEnd   = adapterIntervals[i].end;

        // if we're not in the HQRegion yet, skip ahead
        if (hqStart > adapterEnd)
            continue;

        // if the adapter is beyond the HQRegion, we're done
        if (hqEnd < adapterStart)
            break;

        // If the subread is greater than length=0, save it
        if (subreadStart < adapterStart)
            intervals->emplace_back(SubreadInterval(subreadStart, adapterStart, adapterBefore, true));

        // Save the region of the adapter that overlaps the HQ region
        adapters->emplace_back(SubreadInterval(MAX3(adapterStart, hqStart, subreadStart), 
                    min(adapterEnd, hqEnd)));

        subreadStart  = adapterEnd;
        adapterBefore = true;
    }

    // Save any region between the last adatper and the end of the HQ region as a subread
    if (subreadStart < hqEnd)
        intervals->emplace_back(SubreadInterval(subreadStart, hqEnd, adapterBefore, false));

    return SubreadInterval(hqStart, hqEnd);
}