void MappedSegmentMapUpTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top, const string& ancName) { const Genome* parent = alignment->openGenome(ancName); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, parent, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); // extra hop for when top is in grand child if (bottom->getGenome() != parent) { TopSegmentIteratorConstPtr temp = bottom->getGenome()->getTopSegmentIterator(); temp->toParseUp(bottom); bottom->toParent(temp); } CuAssertTrue(_testCase, mseg->getGenome() == bottom->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == bottom->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == bottom->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == bottom->getReversed()); }
void MappedSegmentMapAcrossTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top) { const Genome* parent = top->getGenome()->getParent(); const Genome* other = top->getGenome()->getName() == "child1" ? alignment->openGenome("child2") : alignment->openGenome("child1"); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, other, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); TopSegmentIteratorConstPtr sister = other->getTopSegmentIterator(); sister->toChildG(bottom, other); CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == sister->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == sister->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == sister->getReversed()); }
void Genome::copyTopSegments(Genome *dest) const { const Genome *inParent = getParent(); const Genome *outParent = dest->getParent(); TopSegmentIteratorConstPtr inTop = getTopSegmentIterator(); TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator(); hal_size_t n = dest->getNumTopSegments(); assert(n == 0 || n == getNumTopSegments()); if (n == 0) { // Nothing to do if there are no top segments. return; } BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator(); BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator(); for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(), outTop->toRight()) { hal_index_t genomePos = inTop->getStartPosition(); assert(genomePos != NULL_INDEX); string inSeqName = getSequenceBySite(genomePos)->getName(); string outSeqName = dest->getSequenceBySite(genomePos)->getName(); // if (inSeqName != outSeqName) { // stringstream ss; // ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos; // throw hal_exception(ss.str()); // } outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength()); outTop->setParentIndex(inTop->getParentIndex()); outTop->setParentReversed(inTop->getParentReversed()); outTop->setBottomParseIndex(inTop->getBottomParseIndex()); outTop->setNextParalogyIndex(inTop->getNextParalogyIndex()); // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are. if (inTop->getParentIndex() != NULL_INDEX) { inParentBottomSegIt->toParent(inTop); const Sequence *inParentSequence = inParentBottomSegIt->getSequence(); const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName()); hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex(); hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex(); outTop->setParentIndex(outParentSegmentIndex); } } }
void SummarizeMutations::subsAndGapInserts( GappedTopSegmentIteratorConstPtr gappedTop, MutationsStats& stats) { assert(gappedTop->getReversed() == false); hal_size_t numGaps = gappedTop->getNumGaps(); if (numGaps > 0) { stats._gapInsertionLength.add(gappedTop->getNumGapBases(), numGaps); } string parent, child; TopSegmentIteratorConstPtr l = gappedTop->getLeft(); TopSegmentIteratorConstPtr r = gappedTop->getRight(); BottomSegmentIteratorConstPtr p = l->getTopSegment()->getGenome()->getParent()->getBottomSegmentIterator(); for (TopSegmentIteratorConstPtr i = l->copy(); i->getTopSegment()->getArrayIndex() <= r->getTopSegment()->getArrayIndex(); i->toRight()) { if (i->hasParent()) { p->toParent(i); i->getString(child); p->getString(parent); assert(child.length() == parent.length()); for (size_t j = 0; j < child.length(); ++j) { if (isTransition(child[j], parent[j])) { ++stats._transitions; ++stats._subs; } else if (isTransversion(child[j], parent[j])) { ++stats._transversions; ++stats._subs; } else if (isSubstitution(child[j], parent[j])) { ++stats._subs; } else if (!isMissingData(child[j]) && !isMissingData(parent[j])) { ++stats._matches; } } } } }
void MappedSegmentMapDupeTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome* parent = alignment->openGenome("parent"); const Genome* child1 = alignment->openGenome("child1"); const Genome* child2 = alignment->openGenome("child2"); TopSegmentIteratorConstPtr top = child1->getTopSegmentIterator(); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, child2, NULL, true); // CuAssertTrue(_testCase, results.size() == 3); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); TopSegmentIteratorConstPtr sister = child2->getTopSegmentIterator(); sister->toChildG(bottom, child2); CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == sister->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == sister->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == sister->getReversed()); top = child2->getTopSegmentIterator(); results.clear(); sister = child1->getTopSegmentIterator(); top->getMappedSegments(results, child1, NULL, true); CuAssertTrue(_testCase, results.size() == 3); bool found[3] = {false}; set<MappedSegmentConstPtr>::iterator i = results.begin(); for (; i != results.end(); ++i) { MappedSegmentConstPtr mseg = *i; CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); TopSegmentIteratorConstPtr sister = child2->getTopSegmentIterator(); sister->toChildG(bottom, child1); CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome()); CuAssertTrue(_testCase, mseg->getLength() == sister->getLength()); found[mseg->getArrayIndex()] = true; } CuAssertTrue(_testCase, found[0] == true); CuAssertTrue(_testCase, found[1] == true); CuAssertTrue(_testCase, found[2] == true); }