예제 #1
0
// Classify the blocks in obList as irreducible, transitive or substrings. The irreducible blocks are
// put into pOBFinal. The remaining are discarded.
// Invariant: the blocks are ordered in descending order of the overlap size so that the longest overlap is first.
void OverlapAlgorithm::_processIrreducibleBlocksInexact(const BWT* pBWT, const BWT* pRevBWT, 
                                                        OverlapBlockList& activeList, 
                                                        OverlapBlockList* pOBFinal) const
{
    if(activeList.empty())
        return;
    
    // The activeList contains all the blocks that are not yet right terminal
    // Count the extensions in the top level (longest) blocks first
    bool all_eliminated = false;
    while(!activeList.empty() && !all_eliminated)
    {
        // The terminalBlock list contains all the blocks that became right-terminal
        // in the current extension round.
        OverlapBlockList terminalList;
        OverlapBlockList potentialContainedList;

        // Perform a single round of extension, any terminal blocks
        // are moved to the terminated list
        extendActiveBlocksRight(pBWT, pRevBWT, activeList, terminalList, potentialContainedList);

        // Compare the blocks in the contained list against the other terminal and active blocks
        // If they are a substring match to any of these, discard them
        OverlapBlockList::iterator containedIter = potentialContainedList.begin();
        for(; containedIter != potentialContainedList.end(); ++containedIter)
        {
           if(!isBlockSubstring(*containedIter, terminalList, m_errorRate) && 
              !isBlockSubstring(*containedIter, activeList, m_errorRate))
           {
                // Not a substring, move to terminal list
                terminalList.push_back(*containedIter);
                //std::cout << "Contained block kept: " << containedIter->overlapLen << "\n";
           }
           else
           {
                //std::cout << "Contained block found and removed: " << containedIter->overlapLen << "\n";
           }
        }

        // Using the terminated blocks, mark as eliminated any active blocks
        // that form a valid overlap to the terminal block. These are transitive edges
        // We do not compare two terminal blocks, we don't consider these overlaps to be
        // transitive
        OverlapBlockList::iterator terminalIter = terminalList.begin();
        for(; terminalIter != terminalList.end(); ++terminalIter)
        {
#ifdef DEBUGOVERLAP
            std::cout << "[II] ***TLB of length " << terminalIter->overlapLen << " has ended\n";
#endif       
            all_eliminated = true;
            OverlapBlockList::iterator activeIter = activeList.begin();
            for(; activeIter != activeList.end(); ++activeIter)
            {
                if(activeIter->isEliminated)
                    continue; // skip previously marked blocks
                
                // Two conditions must be met for a block to be transitive wrt terminal:
                // 1) It must have a strictly shorter overlap than the terminal block
                // 2) The error rate between the block and terminal must be less than the threshold
                double inferredErrorRate = calculateBlockErrorRate(*terminalIter, *activeIter);
                if(activeIter->overlapLen < terminalIter->overlapLen && 
                   isErrorRateAcceptable(inferredErrorRate, m_errorRate))
                {
#ifdef DEBUGOVERLAP_2                            
                    std::cout << "Marking block of length " << activeIter->overlapLen << " as eliminated\n";
#endif
                    activeIter->isEliminated = true;
                }
                else
                {
                    all_eliminated = false;
                }
            } 
            
            // Move this block to the final list if it has not been previously marked eliminated
            if(!terminalIter->isEliminated)
            {
#ifdef DEBUGOVERLAP
                std::cout << "[II] Adding block " << *terminalIter << " to final list\n";
                //std::cout << "  extension: " << terminalIter->forwardHistory << "\n";
#endif                
                pOBFinal->push_back(*terminalIter);
            }
        }
    }

    activeList.clear();
}
예제 #2
0
OverlapResult OverlapAlgorithm::overlapReadInexact(const SeqRecord& read, int minOverlap, OverlapBlockList* pOBOut) const
{
    OverlapResult result;
    OverlapBlockList obWorkingList;
    std::string seq = read.seq.toString();

#ifdef DEBUGOVERLAP
    std::cout << "\n\n***Overlapping read " << read.id << " suffix\n";
#endif

    // Match the suffix of seq to prefixes

    // findInexact returns false is the maximum search time was exceeded. In this
    // case we dont run any of the subsequent commands and return no overlaps.
    bool valid = true;
    valid = findOverlapBlocksInexact(seq, m_pBWT, m_pRevBWT, sufPreAF, 
                                     minOverlap, &obWorkingList, pOBOut, result);

    if(valid)
        valid = findOverlapBlocksInexact(complement(seq), m_pRevBWT, m_pBWT, prePreAF, 
                                         minOverlap, &obWorkingList, pOBOut, result);

    if(valid)
    {
        if(m_bIrreducible)
        {
            computeIrreducibleBlocks(m_pBWT, m_pRevBWT, &obWorkingList, pOBOut);
            obWorkingList.clear();
        }
        else
        {
            pOBOut->splice(pOBOut->end(), obWorkingList);
            assert(obWorkingList.empty());
        }
    }

#ifdef DEBUGOVERLAP
    std::cout << "\n\n***Overlapping read " << read.id << " prefix\n";
#endif

    // Match the prefix of seq to suffixes
    if(valid)
        valid = findOverlapBlocksInexact(reverseComplement(seq), m_pBWT, m_pRevBWT, sufSufAF, minOverlap, &obWorkingList, pOBOut, result);
    
    if(valid)
        valid = findOverlapBlocksInexact(reverse(seq), m_pRevBWT, m_pBWT, preSufAF, minOverlap, &obWorkingList, pOBOut, result);

    if(valid)
    {
        if(m_bIrreducible)
        {
            computeIrreducibleBlocks(m_pBWT, m_pRevBWT, &obWorkingList, pOBOut);
            obWorkingList.clear();
        }
        else
        {
            pOBOut->splice(pOBOut->end(), obWorkingList);
            assert(obWorkingList.empty());
        }
    }

    if(!valid)
    {
        pOBOut->clear();
        result.isSubstring = false;
        result.searchAborted = true;
        return result;
    }

    return result;
}