void hal::validateDuplications(const Genome* genome) { const Genome* parent = genome->getParent(); if (parent == NULL) { return; } TopSegmentIteratorConstPtr topIt = genome->getTopSegmentIterator(); TopSegmentIteratorConstPtr endIt = genome->getTopSegmentEndIterator(); vector<unsigned char> pcount(parent->getNumBottomSegments(), 0); for (; topIt != endIt; topIt->toRight()) { if (topIt->hasParent()) { if (pcount[topIt->getTopSegment()->getParentIndex()] < 250) { ++pcount[topIt->getTopSegment()->getParentIndex()]; } } } for (topIt = genome->getTopSegmentIterator(); topIt != endIt; topIt->toRight()) { if (topIt->hasParent()) { size_t count = pcount[topIt->getTopSegment()->getParentIndex()]; assert(count > 0); { if (topIt->hasNextParalogy() == false && count > 1) { stringstream ss; ss << "Top Segment " << topIt->getTopSegment()->getArrayIndex() << " in genome " << genome->getName() << " is not marked as a" << " duplication but it shares its parent " << topIt->getTopSegment()->getArrayIndex() << " with at least " << count - 1 << " other segments in the same genome"; throw hal_exception(ss.str()); } } } } }
void SummarizeMutations::subsAndGapInserts( GappedTopSegmentIteratorConstPtr gappedTop, MutationsStats& stats) { assert(gappedTop->getReversed() == false); hal_size_t numGaps = gappedTop->getNumGaps(); if (numGaps > 0) { stats._gapInsertionLength.add(gappedTop->getNumGapBases(), numGaps); } string parent, child; TopSegmentIteratorConstPtr l = gappedTop->getLeft(); TopSegmentIteratorConstPtr r = gappedTop->getRight(); BottomSegmentIteratorConstPtr p = l->getTopSegment()->getGenome()->getParent()->getBottomSegmentIterator(); for (TopSegmentIteratorConstPtr i = l->copy(); i->getTopSegment()->getArrayIndex() <= r->getTopSegment()->getArrayIndex(); i->toRight()) { if (i->hasParent()) { p->toParent(i); i->getString(child); p->getString(parent); assert(child.length() == parent.length()); for (size_t j = 0; j < child.length(); ++j) { if (isTransition(child[j], parent[j])) { ++stats._transitions; ++stats._subs; } else if (isTransversion(child[j], parent[j])) { ++stats._transversions; ++stats._subs; } else if (isSubstitution(child[j], parent[j])) { ++stats._subs; } else if (!isMissingData(child[j]) && !isMissingData(parent[j])) { ++stats._matches; } } } } }
void DefaultGappedBottomSegmentIterator::toRightNextUngapped( TopSegmentIteratorConstPtr ts) const { while (ts->hasParent() == false && ts->getLength() <= _gapThreshold) { if ((!ts->getReversed() && ts->getTopSegment()->isLast()) || (ts->getReversed() && ts->getTopSegment()->isFirst())) { break; } ts->toRight(); } }