void TopSegmentIteratorReverseTest::checkCallBack(AlignmentConstPtr alignment) { BottomSegmentIteratorConstPtr bi; TopSegmentIteratorConstPtr ti, ti2; const Genome* parent1 = alignment->openGenome("parent1"); const Genome* child1 = alignment->openGenome("child1"); ti = child1->getTopSegmentIterator(); bi = parent1->getBottomSegmentIterator(); ti2 = child1->getTopSegmentIterator(); ti2->toChild(bi, 0); CuAssertTrue(_testCase, ti->getStartPosition() == 0); CuAssertTrue(_testCase, ti->getLength() == 10); CuAssertTrue(_testCase, ti->getReversed() == false); CuAssertTrue(_testCase, ti2->getStartPosition() == 9); CuAssertTrue(_testCase, ti2->getLength() == 10); CuAssertTrue(_testCase, ti2->getReversed() == true); bi->slice(1, 3); ti2->toChild(bi, 0); CuAssertTrue(_testCase, bi->getStartPosition() == 1); CuAssertTrue(_testCase, bi->getLength() == 6); CuAssertTrue(_testCase, ti2->getStartPosition() == 8); CuAssertTrue(_testCase, ti2->getLength() == 6); string buffer; bi->getString(buffer); CuAssertTrue(_testCase, buffer == "CCTACG"); ti2->getString(buffer); CuAssertTrue(_testCase, buffer == "CACGTA"); bi = child1->getBottomSegmentIterator(); CuAssertTrue(_testCase, bi->getReversed() == false); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 0); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 4); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); CuAssertTrue(_testCase, bi->getReversed() == false); bi->toRight(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 5); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 9); CuAssertTrue(_testCase, ti->getLength() == 5); }
void SummarizeMutations::subsAndGapInserts( GappedTopSegmentIteratorConstPtr gappedTop, MutationsStats& stats) { assert(gappedTop->getReversed() == false); hal_size_t numGaps = gappedTop->getNumGaps(); if (numGaps > 0) { stats._gapInsertionLength.add(gappedTop->getNumGapBases(), numGaps); } string parent, child; TopSegmentIteratorConstPtr l = gappedTop->getLeft(); TopSegmentIteratorConstPtr r = gappedTop->getRight(); BottomSegmentIteratorConstPtr p = l->getTopSegment()->getGenome()->getParent()->getBottomSegmentIterator(); for (TopSegmentIteratorConstPtr i = l->copy(); i->getTopSegment()->getArrayIndex() <= r->getTopSegment()->getArrayIndex(); i->toRight()) { if (i->hasParent()) { p->toParent(i); i->getString(child); p->getString(parent); assert(child.length() == parent.length()); for (size_t j = 0; j < child.length(); ++j) { if (isTransition(child[j], parent[j])) { ++stats._transitions; ++stats._subs; } else if (isTransversion(child[j], parent[j])) { ++stats._transversions; ++stats._subs; } else if (isSubstitution(child[j], parent[j])) { ++stats._subs; } else if (!isMissingData(child[j]) && !isMissingData(parent[j])) { ++stats._matches; } } } } }
// quickly count subsitutions without loading rearrangement machinery. // used for benchmarks for basic file scanning... and not much else since // the interface is still a bit wonky. void SummarizeMutations::substitutionAnalysis(const Genome* genome, MutationsStats& stats) { assert(stats._subs == 0); if (genome->getNumChildren() == 0 || genome->getNumBottomSegments() == 0 || (_targetSet && _targetSet->find(genome->getName()) == _targetSet->end())) { return; } const Genome* parent = genome->getParent(); string pname = parent != NULL ? parent->getName() : string(); StrPair branchName(genome->getName(), pname); BottomSegmentIteratorConstPtr bottom = genome->getBottomSegmentIterator(); TopSegmentIteratorConstPtr top = genome->getChild(0)->getTopSegmentIterator(); string gString, cString; hal_size_t n = genome->getNumBottomSegments(); vector<hal_size_t> children; hal_size_t m = genome->getNumChildren(); for (hal_size_t i = 0; i < m; ++i) { string cName = genome->getChild(i)->getName(); if (!_targetSet || (_targetSet && _targetSet->find(cName) != _targetSet->end())) { children.push_back(i); } } if (children.empty()) { return; } for (hal_size_t i = 0; i < n; ++i) { bool readString = false; for (size_t j = 0; j < children.size(); ++j) { if (bottom->hasChild(children[j])) { if (readString == false) { bottom->getString(gString); readString = true; } top->toChild(bottom, children[j]); top->getString(cString); assert(gString.length() == cString.length()); for (hal_size_t k = 0; k < gString.length(); ++k) { if (isSubstitution(gString[k], cString[k])) { ++stats._subs; } } } } bottom->toRight(); } }