void TopSegmentSequenceTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* ancGenome = alignment->openGenome("Anc0"); TopSegmentIteratorConstPtr tsIt = ancGenome->getTopSegmentIterator(100); CuAssertTrue(_testCase, tsIt->getTopSegment()->getStartPosition() == 500); CuAssertTrue(_testCase, tsIt->getTopSegment()->getLength() == 9); string seq; tsIt->getString(seq); CuAssertTrue(_testCase, seq == "CACACATTC"); tsIt->toReverse(); tsIt->getString(seq); CuAssertTrue(_testCase, seq == "GAATGTGTG"); }
void SummarizeMutations::subsAndGapInserts( GappedTopSegmentIteratorConstPtr gappedTop, MutationsStats& stats) { assert(gappedTop->getReversed() == false); hal_size_t numGaps = gappedTop->getNumGaps(); if (numGaps > 0) { stats._gapInsertionLength.add(gappedTop->getNumGapBases(), numGaps); } string parent, child; TopSegmentIteratorConstPtr l = gappedTop->getLeft(); TopSegmentIteratorConstPtr r = gappedTop->getRight(); BottomSegmentIteratorConstPtr p = l->getTopSegment()->getGenome()->getParent()->getBottomSegmentIterator(); for (TopSegmentIteratorConstPtr i = l->copy(); i->getTopSegment()->getArrayIndex() <= r->getTopSegment()->getArrayIndex(); i->toRight()) { if (i->hasParent()) { p->toParent(i); i->getString(child); p->getString(parent); assert(child.length() == parent.length()); for (size_t j = 0; j < child.length(); ++j) { if (isTransition(child[j], parent[j])) { ++stats._transitions; ++stats._subs; } else if (isTransversion(child[j], parent[j])) { ++stats._transversions; ++stats._subs; } else if (isSubstitution(child[j], parent[j])) { ++stats._subs; } else if (!isMissingData(child[j]) && !isMissingData(parent[j])) { ++stats._matches; } } } } }
// quickly count subsitutions without loading rearrangement machinery. // used for benchmarks for basic file scanning... and not much else since // the interface is still a bit wonky. void SummarizeMutations::substitutionAnalysis(const Genome* genome, MutationsStats& stats) { assert(stats._subs == 0); if (genome->getNumChildren() == 0 || genome->getNumBottomSegments() == 0 || (_targetSet && _targetSet->find(genome->getName()) == _targetSet->end())) { return; } const Genome* parent = genome->getParent(); string pname = parent != NULL ? parent->getName() : string(); StrPair branchName(genome->getName(), pname); BottomSegmentIteratorConstPtr bottom = genome->getBottomSegmentIterator(); TopSegmentIteratorConstPtr top = genome->getChild(0)->getTopSegmentIterator(); string gString, cString; hal_size_t n = genome->getNumBottomSegments(); vector<hal_size_t> children; hal_size_t m = genome->getNumChildren(); for (hal_size_t i = 0; i < m; ++i) { string cName = genome->getChild(i)->getName(); if (!_targetSet || (_targetSet && _targetSet->find(cName) != _targetSet->end())) { children.push_back(i); } } if (children.empty()) { return; } for (hal_size_t i = 0; i < n; ++i) { bool readString = false; for (size_t j = 0; j < children.size(); ++j) { if (bottom->hasChild(children[j])) { if (readString == false) { bottom->getString(gString); readString = true; } top->toChild(bottom, children[j]); top->getString(cString); assert(gString.length() == cString.length()); for (hal_size_t k = 0; k < gString.length(); ++k) { if (isSubstitution(gString[k], cString[k])) { ++stats._subs; } } } } bottom->toRight(); } }