Ejemplo n.º 1
0
// Update the overlap block list with a righthand extension to b, removing ranges that become invalid
void OverlapAlgorithm::updateOverlapBlockRangesRight(const BWT* pBWT, const BWT* pRevBWT, 
                                                     OverlapBlockList& obList, char canonical_base) const
{
    OverlapBlockList::iterator iter = obList.begin(); 
    while(iter != obList.end())
    {
        char relative_base = iter->flags.isQueryComp() ? complement(canonical_base) : canonical_base;
        BWTAlgorithms::updateBothR(iter->ranges, relative_base, iter->getExtensionBWT(pBWT, pRevBWT));
        // remove the block from the list if its no longer valid
        if(!iter->ranges.isValid())
        {
            iter = obList.erase(iter);
        }
        else
        {
            // Add the base to the extension history
            int currExtension = iter->forwardHistory.size();
            iter->forwardHistory.add(currExtension, canonical_base);
            ++iter;
        }
    }
}
Ejemplo n.º 2
0
// Extend all the blocks in activeList by one base to the right
// Move all right-terminal blocks to the termainl list. If a block 
// is terminal and potentially contained by another block, add it to 
// containedList
void OverlapAlgorithm::extendActiveBlocksRight(const BWT* pBWT, const BWT* pRevBWT, 
                                               OverlapBlockList& activeList, 
                                               OverlapBlockList& terminalList,
                                               OverlapBlockList& /*containedList*/) const
{
    OverlapBlockList::iterator iter = activeList.begin();
    OverlapBlockList::iterator next;
    while(iter != activeList.end())
    {
        next = iter;
        ++next;

        // Check if block is terminal
        AlphaCount64 ext_count = iter->getCanonicalExtCount(pBWT, pRevBWT);
        if(ext_count.get('$') > 0)
        {
            // Only consider this block to be terminal irreducible if it has at least one extension
            // or else it is a substring block
            if(iter->forwardHistory.size() > 0)
            {
                OverlapBlock branched = *iter;
                BWTAlgorithms::updateBothR(branched.ranges, '$', branched.getExtensionBWT(pBWT, pRevBWT));
                terminalList.push_back(branched);
#ifdef DEBUGOVERLAP_2            
                std::cout << "Block of length " << iter->overlapLen << " moved to terminal\n";
#endif
            }
        }

        int curr_extension = iter->forwardHistory.size();

        // Perform the right extensions
        
        // Best case, there is only a single extension character
        // Handle this case specially so we don't need to copy the potentially
        // large OverlapBlock structure and its full history
        if(ext_count.hasUniqueDNAChar())
        {
            // Get the extension character with respect to the queried sequence
            char canonical_base = ext_count.getUniqueDNAChar();

            // Flip the base into the frame of reference for the block
            char block_base = iter->flags.isQueryComp() ? complement(canonical_base) : canonical_base;

            // Update the block using the base in its frame of reference
            BWTAlgorithms::updateBothR(iter->ranges, block_base, iter->getExtensionBWT(pBWT, pRevBWT));

            // Add the base to the history in the frame of reference of the query read
            // This is so the history is consistent when comparing between blocks from different strands
            iter->forwardHistory.add(curr_extension, canonical_base);
        }
        else
        {
            for(size_t idx = 0; idx < DNA_ALPHABET_SIZE; ++idx)
            {
                char canonical_base = ALPHABET[idx];
                char block_base = iter->flags.isQueryComp() ? complement(canonical_base) : canonical_base;
                if(ext_count.get(canonical_base) == 0)
                    continue;

                // Branch the sequence. This involves copying the entire history which can be large
                // if the input sequences are very long. This could be avoided by using the SearchHistoyNode/Link
                // structure but branches are infrequent enough to not have a large impact
                OverlapBlock branched = *iter;
                BWTAlgorithms::updateBothR(branched.ranges, block_base, branched.getExtensionBWT(pBWT, pRevBWT));
                assert(branched.ranges.isValid());

                // Add the base in the canonical frame
                branched.forwardHistory.add(curr_extension, canonical_base);

                // Insert the new block after the iterator
                activeList.insert(iter, branched);
            }

            // Remove the original block, which has been superceded by the branches
            activeList.erase(iter);
        }

        iter = next; // this skips the newly-inserted blocks
    }
}