void MappedSegmentMapAcrossTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top) { const Genome* parent = top->getGenome()->getParent(); const Genome* other = top->getGenome()->getName() == "child1" ? alignment->openGenome("child2") : alignment->openGenome("child1"); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, other, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); TopSegmentIteratorConstPtr sister = other->getTopSegmentIterator(); sister->toChildG(bottom, other); CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == sister->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == sister->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == sister->getReversed()); }
void Genome::copyTopSegments(Genome *dest) const { const Genome *inParent = getParent(); const Genome *outParent = dest->getParent(); TopSegmentIteratorConstPtr inTop = getTopSegmentIterator(); TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator(); hal_size_t n = dest->getNumTopSegments(); assert(n == 0 || n == getNumTopSegments()); if (n == 0) { // Nothing to do if there are no top segments. return; } BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator(); BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator(); for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(), outTop->toRight()) { hal_index_t genomePos = inTop->getStartPosition(); assert(genomePos != NULL_INDEX); string inSeqName = getSequenceBySite(genomePos)->getName(); string outSeqName = dest->getSequenceBySite(genomePos)->getName(); // if (inSeqName != outSeqName) { // stringstream ss; // ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos; // throw hal_exception(ss.str()); // } outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength()); outTop->setParentIndex(inTop->getParentIndex()); outTop->setParentReversed(inTop->getParentReversed()); outTop->setBottomParseIndex(inTop->getBottomParseIndex()); outTop->setNextParalogyIndex(inTop->getNextParalogyIndex()); // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are. if (inTop->getParentIndex() != NULL_INDEX) { inParentBottomSegIt->toParent(inTop); const Sequence *inParentSequence = inParentBottomSegIt->getSequence(); const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName()); hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex(); hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex(); outTop->setParentIndex(outParentSegmentIndex); } } }
void TopSegmentIteratorToSiteTest::checkGenome(const Genome* genome) { TopSegmentIteratorConstPtr ti = genome->getTopSegmentIterator(); for (hal_index_t pos = 0; pos < (hal_index_t)genome->getSequenceLength(); ++pos) { ti->toSite(pos); CuAssertTrue(_testCase, ti->getStartPosition() == pos); CuAssertTrue(_testCase, ti->getLength() == 1); ti->toSite(pos, false); CuAssertTrue(_testCase, pos >= ti->getStartPosition() && pos < ti->getStartPosition() + (hal_index_t)ti->getLength()); CuAssertTrue(_testCase, ti->getLength() == ti->getTopSegment()->getLength()); } }
void MappedSegmentMapDownTest::testBottomSegment( AlignmentConstPtr alignment, BottomSegmentIteratorConstPtr bottom, hal_size_t childIndex) { const Genome* child = bottom->getGenome()->getChild(childIndex); set<MappedSegmentConstPtr> results; bottom->getMappedSegments(results, child, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == bottom->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == bottom->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == bottom->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == bottom->getReversed()); TopSegmentIteratorConstPtr top = child->getTopSegmentIterator(); top->toChild(bottom, childIndex); CuAssertTrue(_testCase, mseg->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == top->getReversed()); }
void MappedSegmentMapUpTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top, const string& ancName) { const Genome* parent = alignment->openGenome(ancName); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, parent, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); // extra hop for when top is in grand child if (bottom->getGenome() != parent) { TopSegmentIteratorConstPtr temp = bottom->getGenome()->getTopSegmentIterator(); temp->toParseUp(bottom); bottom->toParent(temp); } CuAssertTrue(_testCase, mseg->getGenome() == bottom->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == bottom->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == bottom->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == bottom->getReversed()); }
void TopSegmentIteratorParseTest::checkCallBack(AlignmentConstPtr alignment) { BottomSegmentIteratorConstPtr bi; TopSegmentIteratorConstPtr ti; // case 1 const Genome* case1 = alignment->openGenome("case1"); ti = case1->getTopSegmentIterator(); bi = case1->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); bi->slice(3, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getLength() == bi->getBottomSegment()->getLength() - 4); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); // case 2 const Genome* case2 = alignment->openGenome("case2"); ti = case2->getTopSegmentIterator(); bi = case2->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(1, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 3 const Genome* case3 = alignment->openGenome("case3"); ti = case3->getTopSegmentIterator(); bi = case3->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 4 const Genome* case4 = alignment->openGenome("case4"); ti = case4->getTopSegmentIterator(); bi = case4->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 2); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); }
void TopSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* ancGenome = alignment->openGenome("Anc0"); CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == _topSegments.size()); TopSegmentIteratorConstPtr tsIt = ancGenome->getTopSegmentIterator(0); for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) { CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].compareTo(tsIt, _testCase); tsIt->toRight(); } tsIt = ancGenome->getTopSegmentIterator( ancGenome->getNumTopSegments() - 1); for (hal_index_t i = ancGenome->getNumTopSegments() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].compareTo(tsIt, _testCase); tsIt->toLeft(); } tsIt = ancGenome->getTopSegmentIterator(0); tsIt->slice(0, tsIt->getLength() - 1); for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toRight(tsIt->getStartPosition() + 1); } tsIt = ancGenome->getTopSegmentIterator( ancGenome->getNumTopSegments() - 1); tsIt->slice(tsIt->getLength() - 1, 0); for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toLeft(tsIt->getStartPosition() - 1); } tsIt = ancGenome->getTopSegmentIterator(0); tsIt->toReverse(); CuAssertTrue(_testCase, tsIt->getReversed() == true); tsIt->slice(tsIt->getLength() - 1, 0); for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toLeft(tsIt->getStartPosition() + 1); } tsIt = ancGenome->getTopSegmentIterator( ancGenome->getNumTopSegments() - 1); tsIt->toReverse(); tsIt->slice(0, tsIt->getLength() - 1); for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toRight(tsIt->getStartPosition() - 1); } }
void MappedSegmentMapExtraParalogsTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome *grandChild1 = alignment->openGenome("grandChild1"); const Genome *grandChild2 = alignment->openGenome("grandChild2"); const Genome *root = alignment->openGenome("root"); TopSegmentIteratorConstPtr top = grandChild2->getTopSegmentIterator(); set<MappedSegmentConstPtr> results; // First, check that by default we will only get the homologies in // or before the MRCA. (in this case, just seg 0 of grandChild1). top->getMappedSegments(results, grandChild1, NULL, true); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); // Source information should be preserved CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); // Check target information is correct CuAssertTrue(_testCase, mseg->getGenome() == grandChild1); CuAssertTrue(_testCase, mseg->getStartPosition() == 2); CuAssertTrue(_testCase, mseg->getLength() == 3); CuAssertTrue(_testCase, mseg->getReversed() == true); // Check that by using the grandparent as the coalescence limit we // will get all the paralogs. top->getMappedSegments(results, grandChild1, NULL, true, 0, root); CuAssertTrue(_testCase, results.size() == 3); set<MappedSegmentConstPtr>::iterator i = results.begin(); bool found[3] = {false, false, false}; for (; i != results.end(); ++i) { // Source information should be preserved CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); // Check target information is correct CuAssertTrue(_testCase, mseg->getGenome() == grandChild1); CuAssertTrue(_testCase, mseg->getStartPosition() == 2 || mseg->getStartPosition() == 5 || mseg->getStartPosition() == 8); CuAssertTrue(_testCase, mseg->getLength() == 3); CuAssertTrue(_testCase, mseg->getReversed() == true); found[mseg->getArrayIndex()] = true; } }
void MappedSegmentMapDupeTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome* parent = alignment->openGenome("parent"); const Genome* child1 = alignment->openGenome("child1"); const Genome* child2 = alignment->openGenome("child2"); TopSegmentIteratorConstPtr top = child1->getTopSegmentIterator(); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, child2, NULL, true); // CuAssertTrue(_testCase, results.size() == 3); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); TopSegmentIteratorConstPtr sister = child2->getTopSegmentIterator(); sister->toChildG(bottom, child2); CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == sister->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == sister->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == sister->getReversed()); top = child2->getTopSegmentIterator(); results.clear(); sister = child1->getTopSegmentIterator(); top->getMappedSegments(results, child1, NULL, true); CuAssertTrue(_testCase, results.size() == 3); bool found[3] = {false}; set<MappedSegmentConstPtr>::iterator i = results.begin(); for (; i != results.end(); ++i) { MappedSegmentConstPtr mseg = *i; CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); TopSegmentIteratorConstPtr sister = child2->getTopSegmentIterator(); sister->toChildG(bottom, child1); CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome()); CuAssertTrue(_testCase, mseg->getLength() == sister->getLength()); found[mseg->getArrayIndex()] = true; } CuAssertTrue(_testCase, found[0] == true); CuAssertTrue(_testCase, found[1] == true); CuAssertTrue(_testCase, found[2] == true); }