// Update the overlap block list with a righthand extension to b, removing ranges that become invalid void OverlapAlgorithm::updateOverlapBlockRangesRight(const BWT* pBWT, const BWT* pRevBWT, OverlapBlockList& obList, char canonical_base) const { OverlapBlockList::iterator iter = obList.begin(); while(iter != obList.end()) { char relative_base = iter->flags.isQueryComp() ? complement(canonical_base) : canonical_base; BWTAlgorithms::updateBothR(iter->ranges, relative_base, iter->getExtensionBWT(pBWT, pRevBWT)); // remove the block from the list if its no longer valid if(!iter->ranges.isValid()) { iter = obList.erase(iter); } else { // Add the base to the extension history int currExtension = iter->forwardHistory.size(); iter->forwardHistory.add(currExtension, canonical_base); ++iter; } } }
// Extend all the blocks in activeList by one base to the right // Move all right-terminal blocks to the termainl list. If a block // is terminal and potentially contained by another block, add it to // containedList void OverlapAlgorithm::extendActiveBlocksRight(const BWT* pBWT, const BWT* pRevBWT, OverlapBlockList& activeList, OverlapBlockList& terminalList, OverlapBlockList& /*containedList*/) const { OverlapBlockList::iterator iter = activeList.begin(); OverlapBlockList::iterator next; while(iter != activeList.end()) { next = iter; ++next; // Check if block is terminal AlphaCount64 ext_count = iter->getCanonicalExtCount(pBWT, pRevBWT); if(ext_count.get('$') > 0) { // Only consider this block to be terminal irreducible if it has at least one extension // or else it is a substring block if(iter->forwardHistory.size() > 0) { OverlapBlock branched = *iter; BWTAlgorithms::updateBothR(branched.ranges, '$', branched.getExtensionBWT(pBWT, pRevBWT)); terminalList.push_back(branched); #ifdef DEBUGOVERLAP_2 std::cout << "Block of length " << iter->overlapLen << " moved to terminal\n"; #endif } } int curr_extension = iter->forwardHistory.size(); // Perform the right extensions // Best case, there is only a single extension character // Handle this case specially so we don't need to copy the potentially // large OverlapBlock structure and its full history if(ext_count.hasUniqueDNAChar()) { // Get the extension character with respect to the queried sequence char canonical_base = ext_count.getUniqueDNAChar(); // Flip the base into the frame of reference for the block char block_base = iter->flags.isQueryComp() ? complement(canonical_base) : canonical_base; // Update the block using the base in its frame of reference BWTAlgorithms::updateBothR(iter->ranges, block_base, iter->getExtensionBWT(pBWT, pRevBWT)); // Add the base to the history in the frame of reference of the query read // This is so the history is consistent when comparing between blocks from different strands iter->forwardHistory.add(curr_extension, canonical_base); } else { for(size_t idx = 0; idx < DNA_ALPHABET_SIZE; ++idx) { char canonical_base = ALPHABET[idx]; char block_base = iter->flags.isQueryComp() ? complement(canonical_base) : canonical_base; if(ext_count.get(canonical_base) == 0) continue; // Branch the sequence. This involves copying the entire history which can be large // if the input sequences are very long. This could be avoided by using the SearchHistoyNode/Link // structure but branches are infrequent enough to not have a large impact OverlapBlock branched = *iter; BWTAlgorithms::updateBothR(branched.ranges, block_base, branched.getExtensionBWT(pBWT, pRevBWT)); assert(branched.ranges.isValid()); // Add the base in the canonical frame branched.forwardHistory.add(curr_extension, canonical_base); // Insert the new block after the iterator activeList.insert(iter, branched); } // Remove the original block, which has been superceded by the branches activeList.erase(iter); } iter = next; // this skips the newly-inserted blocks } }