void DefaultRearrangement::resetStatus(TopSegmentIteratorConstPtr topSegment) { _id = Invalid; assert(topSegment.get()); _genome = topSegment->getTopSegment()->getGenome(); _parent = _genome->getParent(); assert(_parent != NULL); _cur->setLeft(topSegment); _next->copy(_cur); _left->copy(_cur); _right->copy(_left); assert(_cur->getGapThreshold() == _gapThreshold); assert(_next->getGapThreshold() == _gapThreshold); assert(_left->getGapThreshold() == _gapThreshold); assert(_right->getGapThreshold() == _gapThreshold); assert(_leftParent->getGapThreshold() == _gapThreshold); assert(_rightParent->getGapThreshold() == _gapThreshold); assert(_curParent->getGapThreshold() == _gapThreshold); assert(_cur->getAtomic() == _atomic); assert(_next->getAtomic() == _atomic); assert(_left->getAtomic() == _atomic); assert(_right->getAtomic() == _atomic); assert(_leftParent->getAtomic() == _atomic); assert(_rightParent->getAtomic() == _atomic); assert(_curParent->getAtomic() == _atomic); }
void MappedSegmentMapUpTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome* child1 = alignment->openGenome("child1"); const Genome* child2 = alignment->openGenome("child2"); TopSegmentIteratorConstPtr top = child2->getTopSegmentIterator(); testTopSegment(alignment, top, "parent"); top->slice(1,2); testTopSegment(alignment, top, "parent"); top->toReverse(); testTopSegment(alignment, top, "parent"); top = child1->getTopSegmentIterator(); testTopSegment(alignment, top, "parent"); top->slice(1,2); testTopSegment(alignment, top, "parent"); top->toReverse(); testTopSegment(alignment, top, "parent"); const Genome* g1 = alignment->openGenome("g1"); for (hal_size_t i = 0; i < g1->getNumTopSegments(); ++i) { top = g1->getTopSegmentIterator(i); testTopSegment(alignment, top, "parent"); top->slice(1,0); testTopSegment(alignment, top, "parent"); top->toReverse(); testTopSegment(alignment, top, "parent"); top->slice(0,1); testTopSegment(alignment, top, "parent"); top->toReverse(); testTopSegment(alignment, top, "parent"); } }
void SummarizeMutations::subsAndGapInserts( GappedTopSegmentIteratorConstPtr gappedTop, MutationsStats& stats) { assert(gappedTop->getReversed() == false); hal_size_t numGaps = gappedTop->getNumGaps(); if (numGaps > 0) { stats._gapInsertionLength.add(gappedTop->getNumGapBases(), numGaps); } string parent, child; TopSegmentIteratorConstPtr l = gappedTop->getLeft(); TopSegmentIteratorConstPtr r = gappedTop->getRight(); BottomSegmentIteratorConstPtr p = l->getTopSegment()->getGenome()->getParent()->getBottomSegmentIterator(); for (TopSegmentIteratorConstPtr i = l->copy(); i->getTopSegment()->getArrayIndex() <= r->getTopSegment()->getArrayIndex(); i->toRight()) { if (i->hasParent()) { p->toParent(i); i->getString(child); p->getString(parent); assert(child.length() == parent.length()); for (size_t j = 0; j < child.length(); ++j) { if (isTransition(child[j], parent[j])) { ++stats._transitions; ++stats._subs; } else if (isTransversion(child[j], parent[j])) { ++stats._transversions; ++stats._subs; } else if (isSubstitution(child[j], parent[j])) { ++stats._subs; } else if (!isMissingData(child[j]) && !isMissingData(parent[j])) { ++stats._matches; } } } } }
// If true, _leftParent will store the swapped segment (and _cur will store) // the other half // NEED TO REVISE WITH STRONGER CRITERIA -- right now any operation // next to an endpoint can get confused with a translocation. bool DefaultRearrangement::scanTranslocationCycle( TopSegmentIteratorConstPtr topSegment) { assert(topSegment.get()); resetStatus(topSegment); bool first = _cur->isFirst(); bool last = _cur->isLast(); if (_cur->hasParent() == false || (!first && !last)) { return false; } _leftParent->toParent(_cur); bool pFirst = _leftParent->isFirst(); //bool pLast = _leftParent->isLast(); _rightParent->copy(_leftParent); first ? _right->toRight() : _right->toLeft(); pFirst ? _rightParent->toRight() : _rightParent->toLeft(); if (_right->hasParent() == false) { return true; } else { _curParent->toParent(_right); return _curParent->equals(_rightParent); } return false; }
void TopSegmentIteratorToSiteTest::checkGenome(const Genome* genome) { TopSegmentIteratorConstPtr ti = genome->getTopSegmentIterator(); for (hal_index_t pos = 0; pos < (hal_index_t)genome->getSequenceLength(); ++pos) { ti->toSite(pos); CuAssertTrue(_testCase, ti->getStartPosition() == pos); CuAssertTrue(_testCase, ti->getLength() == 1); ti->toSite(pos, false); CuAssertTrue(_testCase, pos >= ti->getStartPosition() && pos < ti->getStartPosition() + (hal_index_t)ti->getLength()); CuAssertTrue(_testCase, ti->getLength() == ti->getTopSegment()->getLength()); } }
// leaves duplication on _cur and _right bool DefaultRearrangement::scanDuplicationCycle( TopSegmentIteratorConstPtr topSegment) { assert(topSegment.get()); resetStatus(topSegment); return _cur->hasNextParalogy() == true && _cur->isCanonicalParalog() == false; }
void MappedSegmentParseTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top) { const Genome* parent = alignment->openGenome("parent"); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, parent, NULL, false); vector<bool> covered(top->getLength(), false); CuAssertTrue(_testCase, results.size() >= 1); set<MappedSegmentConstPtr>::iterator i = results.begin(); for (; i != results.end(); ++i) { MappedSegmentConstPtr mseg = *i; CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getGenome() == parent); for (hal_index_t j = mseg->getStartPosition(); j <= mseg->getEndPosition(); ++j) { CuAssertTrue(_testCase, covered[j] == false); covered[j] = true; } CuAssertTrue(_testCase, mseg->getStartPosition() == mseg->getSource()->getStartPosition()); CuAssertTrue(_testCase, mseg->getEndPosition() == mseg->getSource()->getEndPosition()); set<MappedSegmentConstPtr> tResults; mseg->getMappedSegments(tResults, top->getGenome(), NULL, false); CuAssertTrue(_testCase, tResults.size() == 1); MappedSegmentConstPtr tmseg = *tResults.begin(); CuAssertTrue(_testCase, tmseg->getGenome() == top->getGenome()); CuAssertTrue(_testCase, tmseg->getSource()->getGenome() == mseg->getGenome()); CuAssertTrue(_testCase, tmseg->getStartPosition() == mseg->getStartPosition()); CuAssertTrue(_testCase, tmseg->getEndPosition() == mseg->getEndPosition()); CuAssertTrue(_testCase, tmseg->getSource()->getStartPosition() == mseg->getStartPosition()); CuAssertTrue(_testCase, tmseg->getSource()->getEndPosition() == mseg->getEndPosition()); } }
void MappedSegmentMapUpTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top, const string& ancName) { const Genome* parent = alignment->openGenome(ancName); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, parent, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); // extra hop for when top is in grand child if (bottom->getGenome() != parent) { TopSegmentIteratorConstPtr temp = bottom->getGenome()->getTopSegmentIterator(); temp->toParseUp(bottom); bottom->toParent(temp); } CuAssertTrue(_testCase, mseg->getGenome() == bottom->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == bottom->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == bottom->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == bottom->getReversed()); }
void MappedSegmentMapAcrossTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome* child1 = alignment->openGenome("child1"); const Genome* child2 = alignment->openGenome("child2"); TopSegmentIteratorConstPtr top = child2->getTopSegmentIterator(); testTopSegment(alignment, top); top->slice(1,2); testTopSegment(alignment, top); top->toReverse(); testTopSegment(alignment, top); top = child1->getTopSegmentIterator(); testTopSegment(alignment, top); top->slice(1,2); testTopSegment(alignment, top); top->toReverse(); testTopSegment(alignment, top); }
void TopSegmentStruct::compareTo(TopSegmentIteratorConstPtr it, CuTest* testCase) const { const TopSegment* seg = it->getTopSegment(); CuAssertTrue(testCase, _length == seg->getLength()); CuAssertTrue(testCase, _startPosition == seg->getStartPosition()); CuAssertTrue(testCase, _nextParalogyIndex == seg->getNextParalogyIndex()); CuAssertTrue(testCase, _parentIndex == seg->getParentIndex()); CuAssertTrue(testCase, _bottomParseIndex == seg->getBottomParseIndex()); }
void MappedSegmentMapDownTest::testBottomSegment( AlignmentConstPtr alignment, BottomSegmentIteratorConstPtr bottom, hal_size_t childIndex) { const Genome* child = bottom->getGenome()->getChild(childIndex); set<MappedSegmentConstPtr> results; bottom->getMappedSegments(results, child, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == bottom->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == bottom->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == bottom->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == bottom->getReversed()); TopSegmentIteratorConstPtr top = child->getTopSegmentIterator(); top->toChild(bottom, childIndex); CuAssertTrue(_testCase, mseg->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == top->getReversed()); }
bool DefaultRearrangement::identifyInsertionFromLeftBreakpoint( TopSegmentIteratorConstPtr topSegment) { assert(topSegment->getReversed() == false); if (scanInsertionCycle(topSegment) == true && _cur->hasParent() == false) { _id = Insertion; return true; } _id = Invalid; return false; }
void TopSegmentSequenceTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* ancGenome = alignment->openGenome("Anc0"); TopSegmentIteratorConstPtr tsIt = ancGenome->getTopSegmentIterator(100); CuAssertTrue(_testCase, tsIt->getTopSegment()->getStartPosition() == 500); CuAssertTrue(_testCase, tsIt->getTopSegment()->getLength() == 9); string seq; tsIt->getString(seq); CuAssertTrue(_testCase, seq == "CACACATTC"); tsIt->toReverse(); tsIt->getString(seq); CuAssertTrue(_testCase, seq == "GAATGTGTG"); }
void DefaultGappedBottomSegmentIterator::toRightNextUngapped( TopSegmentIteratorConstPtr ts) const { while (ts->hasParent() == false && ts->getLength() <= _gapThreshold) { if ((!ts->getReversed() && ts->getTopSegment()->isLast()) || (ts->getReversed() && ts->getTopSegment()->isFirst())) { break; } ts->toRight(); } }
// Segment is an inverted descendant of another Segment but // otherwise no rearrangement. bool DefaultRearrangement::scanInversionCycle( TopSegmentIteratorConstPtr topSegment) { assert(topSegment.get()); resetStatus(topSegment); bool first = _cur->isFirst(); bool last = _cur->isLast(); if (_cur->hasParent() == false) { return false; } _curParent->toParent(_cur); if (first == false) { _left->toLeft(); if (_left->hasParent() == false) { return false; } _leftParent->toParent(_left); if (_leftParent->adjacentTo(_curParent) == false) { return false; } } if (last == false) { _right->toRight(); if (_right->hasParent() == false) { return false; } _rightParent->toParent(_right); if (_rightParent->adjacentTo(_curParent) == false) { return false; } } return _cur->getParentReversed(); }
// Segment corresponds to no rearrangemnt. This will happen when // there is a rearrangement in the homolgous segment in its sibling // genome. In general, we can expect about half of segments to correspond // to such cases. bool DefaultRearrangement::scanNothingCycle( TopSegmentIteratorConstPtr topSegment) { assert(topSegment.get()); resetStatus(topSegment); bool first = _cur->isFirst(); bool last = _cur->isLast(); if (_cur->hasParent() == false) { return false; } _curParent->toParent(_cur); if (first == false) { _left->toLeft(); if (_left->hasParent() == false) { return false; } _leftParent->toParent(_left); if (_leftParent->adjacentTo(_curParent) == false) { return false; } if (_left->getParentReversed() == true) { if (_cur->getParentReversed() == false || _leftParent->rightOf(_curParent->getStartPosition()) == false) { return false; } } else { if (_cur->getParentReversed() == true || _leftParent->leftOf(_curParent->getStartPosition()) == false) { return false; } } } if (last == false) { _right->toRight(); if (_right->hasParent() == false) { return false; } _rightParent->toParent(_right); if (_rightParent->adjacentTo(_curParent) == false) { return false; } if (_right->getParentReversed() == true) { if (_cur->getParentReversed() == false || _rightParent->leftOf(_curParent->getStartPosition()) == false) { return false; } } else { if (_cur->getParentReversed() == true || _rightParent->rightOf(_curParent->getStartPosition()) == false) { return false; } } } return last && first ? _cur->getParentReversed() : true; }
void hal::validateDuplications(const Genome* genome) { const Genome* parent = genome->getParent(); if (parent == NULL) { return; } TopSegmentIteratorConstPtr topIt = genome->getTopSegmentIterator(); TopSegmentIteratorConstPtr endIt = genome->getTopSegmentEndIterator(); vector<unsigned char> pcount(parent->getNumBottomSegments(), 0); for (; topIt != endIt; topIt->toRight()) { if (topIt->hasParent()) { if (pcount[topIt->getTopSegment()->getParentIndex()] < 250) { ++pcount[topIt->getTopSegment()->getParentIndex()]; } } } for (topIt = genome->getTopSegmentIterator(); topIt != endIt; topIt->toRight()) { if (topIt->hasParent()) { size_t count = pcount[topIt->getTopSegment()->getParentIndex()]; assert(count > 0); { if (topIt->hasNextParalogy() == false && count > 1) { stringstream ss; ss << "Top Segment " << topIt->getTopSegment()->getArrayIndex() << " in genome " << genome->getName() << " is not marked as a" << " duplication but it shares its parent " << topIt->getTopSegment()->getArrayIndex() << " with at least " << count - 1 << " other segments in the same genome"; throw hal_exception(ss.str()); } } } } }
void hal::validateTopSegment(const TopSegment* topSegment) { const Genome* genome = topSegment->getGenome(); hal_index_t index = topSegment->getArrayIndex(); if (index < 0 || index >= (hal_index_t)genome->getSequenceLength()) { stringstream ss; ss << "Segment out of range " << index << " in genome " << genome->getName(); throw hal_exception(ss.str()); } if (topSegment->getLength() < 1) { stringstream ss; ss << "Top segment " << index << " in genome " << genome->getName() << " has length 0 which is not currently supported"; throw hal_exception(ss.str()); } const Genome* parentGenome = genome->getParent(); const hal_index_t parentIndex = topSegment->getParentIndex(); if (parentGenome != NULL && parentIndex != NULL_INDEX) { if (parentIndex >= (hal_index_t)parentGenome->getNumBottomSegments()) { stringstream ss; ss << "Parent index " << parentIndex << " of segment " << topSegment->getArrayIndex() << " out of range in genome " << parentGenome->getName(); throw hal_exception(ss.str()); } BottomSegmentIteratorConstPtr bottomSegmentIterator = parentGenome->getBottomSegmentIterator(parentIndex); const BottomSegment* parentSegment = bottomSegmentIterator->getBottomSegment(); if (topSegment->getLength() != parentSegment->getLength()) { stringstream ss; ss << "Parent length of segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has length " << parentSegment->getLength() << " which does not match " << topSegment->getLength(); throw hal_exception(ss.str()); } } const hal_index_t parseIndex = topSegment->getBottomParseIndex(); if (parseIndex == NULL_INDEX) { if (genome->getNumChildren() != 0) { stringstream ss; ss << "Top Segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has null parse index"; throw hal_exception(ss.str()); } } else { if (parseIndex >= (hal_index_t)genome->getNumBottomSegments()) { stringstream ss; ss << "Top Segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has parse index out of range"; throw hal_exception(ss.str()); } hal_offset_t parseOffset = topSegment->getBottomParseOffset(); BottomSegmentIteratorConstPtr bottomSegmentIterator = genome->getBottomSegmentIterator(parseIndex); const BottomSegment* parseSegment = bottomSegmentIterator->getBottomSegment(); if (parseOffset >= parseSegment->getLength()) { stringstream ss; ss << "Top Segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has parse offset out of range"; throw hal_exception(ss.str()); } if ((hal_index_t)parseOffset + parseSegment->getStartPosition() != topSegment->getStartPosition()) { throw hal_exception("parse index broken in top segment in genome " + genome->getName()); } } const hal_index_t paralogyIndex = topSegment->getNextParalogyIndex(); if (paralogyIndex != NULL_INDEX) { TopSegmentIteratorConstPtr pti = genome->getTopSegmentIterator(paralogyIndex); if (pti->getTopSegment()->getParentIndex() != topSegment->getParentIndex()) { stringstream ss; ss << "Top segment " << topSegment->getArrayIndex() << " has parent index " << topSegment->getParentIndex() << ", but next paraglog " << topSegment->getNextParalogyIndex() << " has parent Index " << pti->getTopSegment()->getParentIndex() << ". Paralogous top segments must share same parent."; throw hal_exception(ss.str()); } if (paralogyIndex == topSegment->getArrayIndex()) { stringstream ss; ss << "Top segment " << topSegment->getArrayIndex() << " has paralogy index " << topSegment->getNextParalogyIndex() << " which isn't allowed"; throw hal_exception(ss.str()); } } }
void MappedSegmentMapExtraParalogsTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome *grandChild1 = alignment->openGenome("grandChild1"); const Genome *grandChild2 = alignment->openGenome("grandChild2"); const Genome *root = alignment->openGenome("root"); TopSegmentIteratorConstPtr top = grandChild2->getTopSegmentIterator(); set<MappedSegmentConstPtr> results; // First, check that by default we will only get the homologies in // or before the MRCA. (in this case, just seg 0 of grandChild1). top->getMappedSegments(results, grandChild1, NULL, true); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); // Source information should be preserved CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); // Check target information is correct CuAssertTrue(_testCase, mseg->getGenome() == grandChild1); CuAssertTrue(_testCase, mseg->getStartPosition() == 2); CuAssertTrue(_testCase, mseg->getLength() == 3); CuAssertTrue(_testCase, mseg->getReversed() == true); // Check that by using the grandparent as the coalescence limit we // will get all the paralogs. top->getMappedSegments(results, grandChild1, NULL, true, 0, root); CuAssertTrue(_testCase, results.size() == 3); set<MappedSegmentConstPtr>::iterator i = results.begin(); bool found[3] = {false, false, false}; for (; i != results.end(); ++i) { // Source information should be preserved CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); // Check target information is correct CuAssertTrue(_testCase, mseg->getGenome() == grandChild1); CuAssertTrue(_testCase, mseg->getStartPosition() == 2 || mseg->getStartPosition() == 5 || mseg->getStartPosition() == 8); CuAssertTrue(_testCase, mseg->getLength() == 3); CuAssertTrue(_testCase, mseg->getReversed() == true); found[mseg->getArrayIndex()] = true; } }
void GappedSegmentIteratorIndelTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* child = alignment->openGenome("child"); const Genome* parent = alignment->openGenome("parent"); GappedTopSegmentIteratorConstPtr gtsIt = child->getGappedTopSegmentIterator(0, 9999999); GappedBottomSegmentIteratorConstPtr gbsIt = parent->getGappedBottomSegmentIterator(0, 0, 9999999); GappedTopSegmentIteratorConstPtr gtsItRev = child->getGappedTopSegmentIterator(0, 9999999); gtsItRev->toReverse(); GappedBottomSegmentIteratorConstPtr gbsItRev = parent->getGappedBottomSegmentIterator(0, 0, 9999999); gbsItRev->toReverse(); for (size_t i = 0; i < child->getNumTopSegments(); i += 20) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); tsIt = gtsIt->getRight(); CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i + 19); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, (size_t)bsIt->getBottomSegment()->getArrayIndex() == i); bsIt = gbsIt->getRight(); CuAssertTrue(_testCase, (size_t)bsIt->getBottomSegment()->getArrayIndex() == i + 19); GappedBottomSegmentIteratorConstPtr gappedParent = gbsIt->copy(); gappedParent->toParent(gtsIt); if (gappedParent->getReversed()) { gappedParent->toReverse(); } CuAssertTrue(_testCase, gappedParent->equals(gbsIt)); GappedTopSegmentIteratorConstPtr gappedChild = gtsIt->copy(); gappedChild->toChild(gbsIt); if (gappedChild->getReversed()) { gappedChild->toReverse(); } CuAssertTrue(_testCase, gappedChild->equals(gtsIt)); gtsIt->toRight(); gbsIt->toRight(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, (size_t)tsItRev->getTopSegment()->getArrayIndex() == i + 19); tsItRev = gtsItRev->getRight(); CuAssertTrue(_testCase, (size_t)tsItRev->getTopSegment()->getArrayIndex() == i); gtsItRev->toLeft(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i+19); bsItRev = gbsItRev->getRight(); CuAssertTrue(_testCase, (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i); gbsItRev->toLeft(); } gtsIt = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 20, 9999999); gbsIt = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 20, 0, 9999999); gtsItRev = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 20, 9999999); gtsItRev->toReverse(); gbsItRev = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 20, 0, 9999999); gbsItRev->toReverse(); for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; i -= 20) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i - 19); tsIt = gtsIt->getRight(); CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsIt->getReversed() == false); gtsIt->toLeft(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i-19); bsIt = gbsIt->getRight(); CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsIt->getReversed() == false); gbsIt->toLeft(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i); tsItRev = gtsItRev->getRight(); CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i-19); CuAssertTrue(_testCase, gtsItRev->getReversed() == true); gtsItRev->toRight(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i); bsItRev = gbsItRev->getRight(); CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex()==i-19); CuAssertTrue(_testCase, gbsItRev->getReversed() == true); gbsItRev->toRight(); } }
void TopSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* ancGenome = alignment->openGenome("Anc0"); CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == _topSegments.size()); TopSegmentIteratorConstPtr tsIt = ancGenome->getTopSegmentIterator(0); for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) { CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].compareTo(tsIt, _testCase); tsIt->toRight(); } tsIt = ancGenome->getTopSegmentIterator( ancGenome->getNumTopSegments() - 1); for (hal_index_t i = ancGenome->getNumTopSegments() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].compareTo(tsIt, _testCase); tsIt->toLeft(); } tsIt = ancGenome->getTopSegmentIterator(0); tsIt->slice(0, tsIt->getLength() - 1); for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toRight(tsIt->getStartPosition() + 1); } tsIt = ancGenome->getTopSegmentIterator( ancGenome->getNumTopSegments() - 1); tsIt->slice(tsIt->getLength() - 1, 0); for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toLeft(tsIt->getStartPosition() - 1); } tsIt = ancGenome->getTopSegmentIterator(0); tsIt->toReverse(); CuAssertTrue(_testCase, tsIt->getReversed() == true); tsIt->slice(tsIt->getLength() - 1, 0); for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toLeft(tsIt->getStartPosition() + 1); } tsIt = ancGenome->getTopSegmentIterator( ancGenome->getNumTopSegments() - 1); tsIt->toReverse(); tsIt->slice(0, tsIt->getLength() - 1); for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toRight(tsIt->getStartPosition() - 1); } }
// quickly count subsitutions without loading rearrangement machinery. // used for benchmarks for basic file scanning... and not much else since // the interface is still a bit wonky. void SummarizeMutations::substitutionAnalysis(const Genome* genome, MutationsStats& stats) { assert(stats._subs == 0); if (genome->getNumChildren() == 0 || genome->getNumBottomSegments() == 0 || (_targetSet && _targetSet->find(genome->getName()) == _targetSet->end())) { return; } const Genome* parent = genome->getParent(); string pname = parent != NULL ? parent->getName() : string(); StrPair branchName(genome->getName(), pname); BottomSegmentIteratorConstPtr bottom = genome->getBottomSegmentIterator(); TopSegmentIteratorConstPtr top = genome->getChild(0)->getTopSegmentIterator(); string gString, cString; hal_size_t n = genome->getNumBottomSegments(); vector<hal_size_t> children; hal_size_t m = genome->getNumChildren(); for (hal_size_t i = 0; i < m; ++i) { string cName = genome->getChild(i)->getName(); if (!_targetSet || (_targetSet && _targetSet->find(cName) != _targetSet->end())) { children.push_back(i); } } if (children.empty()) { return; } for (hal_size_t i = 0; i < n; ++i) { bool readString = false; for (size_t j = 0; j < children.size(); ++j) { if (bottom->hasChild(children[j])) { if (readString == false) { bottom->getString(gString); readString = true; } top->toChild(bottom, children[j]); top->getString(cString); assert(gString.length() == cString.length()); for (hal_size_t k = 0; k < gString.length(); ++k) { if (isSubstitution(gString[k], cString[k])) { ++stats._subs; } } } } bottom->toRight(); } }
// If true, _cur will store the insertion 'candidate' // It must be further verified that this segment has no parent to // distinguish between destination of transposition and insertion. bool DefaultRearrangement::scanInsertionCycle( TopSegmentIteratorConstPtr topSegment) { assert(topSegment.get()); resetStatus(topSegment); // eat up any adjacent insertions so they don't get double counted while (_next->hasParent() == false && _next->isLast() == false) { _right->copy(_next); _right->toRight(); if (_right->hasParent() == false) { _next->copy(_right); } else { break; } } _right->copy(_next); assert(_next->equals(_cur) || _next->hasParent() == false); bool first = _cur->isFirst(); bool last = _right->isLast(); if (first && last) { return false; } // Case 1a) current segment is left endpoint. we consider insertion // if right neighbour has parent if (first) { _right->toRight(); if (_cur->hasParent() == false) { return true; } else if (_right->hasParent()) { _curParent->toParent(_cur); _rightParent->toParent(_right); return _rightParent->adjacentTo(_curParent) == false; } } // Case 1b) current segment is right endpoint. we consider insertion // if left neighbour has parent else if (last) { _left->toLeft(); if (_cur->hasParent() == false) { return true; } else if (_left->hasParent()) { _curParent->toParent(_cur); _leftParent->toParent(_left); return _leftParent->adjacentTo(_curParent) == false; } } // Case 2) current segment has a left neigbhour and a right neigbour else { _left->toLeft(); _right->toRight(); if (_left->hasParent() == true && _right->hasParent() == true) { _leftParent->toParent(_left); _rightParent->toParent(_right); // Case 2a) Parents are adjacent if (_leftParent->adjacentTo(_rightParent)) { return true; } // Case 2b) Left parent is endpoint else if (_leftParent->isFirst() || _leftParent->isLast()) { return _leftParent->getSequence() == _rightParent->getSequence(); } // Case 2c) Right parent is endpoint else if (_rightParent->isFirst() || _rightParent->isLast()) { return _leftParent->getSequence() == _rightParent->getSequence(); } } } return false; }
void hal::validateBottomSegment(const BottomSegment* bottomSegment) { const Genome* genome = bottomSegment->getGenome(); hal_index_t index = bottomSegment->getArrayIndex(); if (index < 0 || index >= (hal_index_t)genome->getSequenceLength()) { stringstream ss; ss << "Bottom segment out of range " << index << " in genome " << genome->getName(); throw hal_exception(ss.str()); } if (bottomSegment->getLength() < 1) { stringstream ss; ss << "Bottom segment " << index << " in genome " << genome->getName() << " has length 0 which is not currently supported"; throw hal_exception(ss.str()); } hal_size_t numChildren = bottomSegment->getNumChildren(); for (hal_size_t child = 0; child < numChildren; ++child) { const Genome* childGenome = genome->getChild(child); const hal_index_t childIndex = bottomSegment->getChildIndex(child); if (childGenome != NULL && childIndex != NULL_INDEX) { if (childIndex >= (hal_index_t)childGenome->getNumTopSegments()) { stringstream ss; ss << "Child " << child << " index " <<childIndex << " of segment " << bottomSegment->getArrayIndex() << " out of range in genome " << childGenome->getName(); throw hal_exception(ss.str()); } TopSegmentIteratorConstPtr topSegmentIteratr = childGenome->getTopSegmentIterator(childIndex); const TopSegment* childSegment = topSegmentIteratr->getTopSegment(); if (childSegment->getLength() != bottomSegment->getLength()) { stringstream ss; ss << "Child " << child << " with index " << childSegment->getArrayIndex() << " and start position " << childSegment->getStartPosition() << " and sequence " << childSegment->getSequence()->getName() << " has length " << childSegment->getLength() << " but parent with index " << bottomSegment->getArrayIndex() << " and start position " << bottomSegment->getStartPosition() << " in sequence " << bottomSegment->getSequence()->getName() << " has length " << bottomSegment->getLength(); throw hal_exception(ss.str()); } if (childSegment->getNextParalogyIndex() == NULL_INDEX && childSegment->getParentIndex() != bottomSegment->getArrayIndex()) { throw hal_exception("parent / child index mismatch (parent=" + genome->getName() + " child=" + childGenome->getName()); } if (childSegment->getParentReversed() != bottomSegment->getChildReversed(child)) { throw hal_exception("parent / child reversal mismatch (parent=" + genome->getName() + " child=" + childGenome->getName()); } } } const hal_index_t parseIndex = bottomSegment->getTopParseIndex(); if (parseIndex == NULL_INDEX) { if (genome->getParent() != NULL) { stringstream ss; ss << "Bottom segment " << bottomSegment->getArrayIndex() << " in genome " << genome->getName() << " has null parse index"; throw hal_exception(ss.str()); } } else { if (parseIndex >= (hal_index_t)genome->getNumTopSegments()) { stringstream ss; ss << "BottomSegment " << bottomSegment->getArrayIndex() << " in genome " << genome->getName() << " has parse index " << parseIndex << " greater than the number of top segments, " << (hal_index_t)genome->getNumTopSegments(); throw hal_exception(ss.str()); } TopSegmentIteratorConstPtr parseIterator = genome->getTopSegmentIterator(parseIndex); const TopSegment* parseSegment = parseIterator->getTopSegment(); hal_offset_t parseOffset = bottomSegment->getTopParseOffset(); if (parseOffset >= parseSegment->getLength()) { stringstream ss; ss << "BottomSegment " << bottomSegment->getArrayIndex() << " in genome " << genome->getName() << " has parse offset, " << parseOffset << ", greater than the length of the segment, " << parseSegment->getLength(); throw hal_exception(ss.str()); } if ((hal_index_t)parseOffset + parseSegment->getStartPosition() != bottomSegment->getStartPosition()) { throw hal_exception("parse index broken in bottom segment in genome " + genome->getName()); } } }
// If true, _leftParent will store the deletion 'candidate' // It must be further verified that this segment has no child to // distinguish between source of transposition and deletion. bool DefaultRearrangement::scanDeletionCycle( TopSegmentIteratorConstPtr topSegment) { assert(topSegment.get()); resetStatus(topSegment); assert(_atomic != true || _cur->getNumSegments() == 1); bool first = _cur->isFirst(); bool last = _cur->isLast(); if (_cur->hasParent() == false || (first && last)) { return false; } // Case 1) current segment is a right endpoint. we consider delection // if parent has neighbour // FIXME: the edge cases are probably very wrong. if (last) { _leftParent->toParent(_cur); if (_leftParent->isFirst() == false) { _leftParent->toLeft(); return true; } if (_leftParent->isLast() == false) { _leftParent->toRight(); return true; } } // Case 2) Try to find deletion cycle by going right-up-left-left-down else { _rightParent->toParent(_cur); // FIXME: the edge cases are probably very wrong. if (first) { return false; } _left->toLeft(); assert(_rightParent->getGapThreshold() == _gapThreshold); assert(_cur->getGapThreshold() == _gapThreshold); assert(_atomic != true || _rightParent->getNumSegments() == 1); assert(_atomic != true || _left->getNumSegments() == 1); if (_left->hasParent() == false) { return false; } _leftParent->toParent(_left); if (_leftParent->getSequence() == _rightParent->getSequence()) { // don't care about inversions // so we make sure left is left of right and they are both positive if (_leftParent->getReversed() == true) { _leftParent->toReverse(); } if (_rightParent->getReversed() == true) { _rightParent->toReverse(); } if (_rightParent->getLeftArrayIndex() < _leftParent->getLeftArrayIndex()) { swap(_leftParent, _rightParent); } if (_leftParent->isLast()) { return false; } _leftParent->toRight(); return _leftParent->adjacentTo(_rightParent); } } return false; }
void hal::validateSequence(const Sequence* sequence) { // Verify that the DNA sequence doesn't contain funny characters DNAIteratorConstPtr dnaIt = sequence->getDNAIterator(); hal_size_t length = sequence->getSequenceLength(); for (hal_size_t i = 0; i < length; ++i) { char c = dnaIt->getChar(); if (isNucleotide(c) == false) { stringstream ss; ss << "Non-nucleotide character discoverd at position " << i << " of sequence " << sequence->getName() << ": " << c; throw hal_exception(ss.str()); } } // Check the top segments if (sequence->getGenome()->getParent() != NULL) { hal_size_t totalTopLength = 0; TopSegmentIteratorConstPtr topIt = sequence->getTopSegmentIterator(); hal_size_t numTopSegments = sequence->getNumTopSegments(); for (hal_size_t i = 0; i < numTopSegments; ++i) { const TopSegment* topSegment = topIt->getTopSegment(); validateTopSegment(topSegment); totalTopLength += topSegment->getLength(); topIt->toRight(); } if (totalTopLength != length) { stringstream ss; ss << "Sequence " << sequence->getName() << " has length " << length << " but its top segments add up to " << totalTopLength; throw hal_exception(ss.str()); } } // Check the bottom segments if (sequence->getGenome()->getNumChildren() > 0) { hal_size_t totalBottomLength = 0; BottomSegmentIteratorConstPtr bottomIt = sequence->getBottomSegmentIterator(); hal_size_t numBottomSegments = sequence->getNumBottomSegments(); for (hal_size_t i = 0; i < numBottomSegments; ++i) { const BottomSegment* bottomSegment = bottomIt->getBottomSegment(); validateBottomSegment(bottomSegment); totalBottomLength += bottomSegment->getLength(); bottomIt->toRight(); } if (totalBottomLength != length) { stringstream ss; ss << "Sequence " << sequence->getName() << " has length " << length << " but its bottom segments add up to " << totalBottomLength; throw hal_exception(ss.str()); } } }
void GappedSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* child = alignment->openGenome("child"); const Genome* parent = alignment->openGenome("parent"); GappedTopSegmentIteratorConstPtr gtsIt = child->getGappedTopSegmentIterator(0, 9999999); GappedBottomSegmentIteratorConstPtr gbsIt = parent->getGappedBottomSegmentIterator(0, 0, 9999999); GappedTopSegmentIteratorConstPtr gtsItRev = child->getGappedTopSegmentIterator(0, 9999999); gtsItRev->toReverse(); GappedBottomSegmentIteratorConstPtr gbsItRev = parent->getGappedBottomSegmentIterator(0, 0, 9999999); gbsItRev->toReverse(); for (size_t i = 0; i < child->getNumTopSegments(); ++i) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight())); CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); gtsIt->toRight(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight())); CuAssertTrue(_testCase, (size_t)bsIt->getBottomSegment()->getArrayIndex() == i); gbsIt->toRight(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight())); CuAssertTrue(_testCase, (size_t)tsItRev->getTopSegment()->getArrayIndex() == i); gtsItRev->toLeft(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight())); CuAssertTrue(_testCase, (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i); gbsItRev->toLeft(); } gtsIt = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 1, 9999999); gbsIt = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 1, 0, 9999999); gtsItRev = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 1, 9999999); gtsItRev->toReverse(); gbsItRev = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 1, 0, 9999999); gbsItRev->toReverse(); for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; --i) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight())); CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsIt->getReversed() == false); gtsIt->toLeft(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight())); CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsIt->getReversed() == false); gbsIt->toLeft(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight())); CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsItRev->getReversed() == true); gtsItRev->toRight(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight())); CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsItRev->getReversed() == true); gbsItRev->toRight(); } }
void TopSegmentIteratorParseTest::checkCallBack(AlignmentConstPtr alignment) { BottomSegmentIteratorConstPtr bi; TopSegmentIteratorConstPtr ti; // case 1 const Genome* case1 = alignment->openGenome("case1"); ti = case1->getTopSegmentIterator(); bi = case1->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); bi->slice(3, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getLength() == bi->getBottomSegment()->getLength() - 4); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); // case 2 const Genome* case2 = alignment->openGenome("case2"); ti = case2->getTopSegmentIterator(); bi = case2->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(1, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 3 const Genome* case3 = alignment->openGenome("case3"); ti = case3->getTopSegmentIterator(); bi = case3->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 4 const Genome* case4 = alignment->openGenome("case4"); ti = case4->getTopSegmentIterator(); bi = case4->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 2); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); }
int main(int argc, char** argv) { CLParserPtr optionsParser = hdf5CLParserInstance(); optionsParser->setDescription("Rertrieve chain (pairwise alignment) " "information from a hal database.\n" "WARNING: THIS TOOL WAS NEVER FINISHED OR" " TESTED. USE AT OWN RISK. PLEASE " "CONSIDER halLiftover --outPSL INSTEAD."); optionsParser->addArgument("halFile", "path to hal file to analyze"); optionsParser->addArgument("genome", "(query) genome to process"); optionsParser->addOption("sequence", "sequence name in query genome (" "all sequences if not specified)", "\"\""); optionsParser->addOption("start", "start position in query genome", 0); optionsParser->addOption("length", "maximum length of chain to output.", 0); optionsParser->addOption("chainFile", "path for output file. stdout if not" " specified", "\"\""); optionsParser->addOption("maxGap", "maximum indel length to be considered a gap within" " a chain.", 20); string halPath; string chainPath; string genomeName; string sequenceName; hal_size_t start; hal_size_t length; hal_size_t maxGap; try { optionsParser->parseOptions(argc, argv); halPath = optionsParser->getArgument<string>("halFile"); genomeName = optionsParser->getArgument<string>("genome"); sequenceName = optionsParser->getOption<string>("sequence"); start = optionsParser->getOption<hal_size_t>("start"); length = optionsParser->getOption<hal_size_t>("length"); chainPath = optionsParser->getOption<string>("chainFile"); maxGap = optionsParser->getOption<hal_size_t>("maxGap"); } catch(exception& e) { cerr << e.what() << endl; optionsParser->printUsage(cerr); exit(1); } try { cerr << "WARNING: THIS TOOL WAS NEVER FINISHED OR TESTED. USE AT OWN RISK." << " PLEASE CONSIDER halLiftover --outPSL INSTEAD." <<endl; AlignmentConstPtr alignment = openHalAlignmentReadOnly(halPath, optionsParser); const Genome* genome = alignment->openGenome(genomeName); if (genome == NULL) { throw hal_exception(string("Genome not found: ") + genomeName); } hal_index_t endPosition = length > 0 ? start + length : genome->getSequenceLength(); const Sequence* sequence = NULL; if (sequenceName != "\"\"") { sequence = genome->getSequence(sequenceName); if (sequence == NULL) { throw hal_exception(string("Sequence not found: ") + sequenceName); } start += sequence->getStartPosition(); endPosition = length > 0 ? start + length : sequence->getSequenceLength(); } ofstream ofile; ostream& outStream = chainPath == "\"\"" ? cout : ofile; if (chainPath != "\"\"") { ofile.open(chainPath.c_str()); if (!ofile) { throw hal_exception(string("Error opening output file ") + chainPath); } } TopSegmentIteratorConstPtr top = genome->getTopSegmentIterator(); top->toSite(start, false); // do slicing here; GappedTopSegmentIteratorConstPtr gtop = genome->getGappedTopSegmentIterator(top->getArrayIndex(), maxGap); // need to review! Chain chain; chain._id = 0; while (gtop->getRightArrayIndex() < (hal_index_t)genome->getNumTopSegments() && gtop->getLeft()->getStartPosition() < endPosition) { if (gtop->hasParent() == true) { hal_offset_t leftOffset = 0; if ((hal_index_t)start > gtop->getStartPosition() && (hal_index_t)start < gtop->getEndPosition()) { leftOffset = start - gtop->getStartPosition() ; } hal_offset_t rightOffset = 0; if (endPosition - 1 > gtop->getStartPosition() && endPosition - 1 < gtop->getEndPosition()) { rightOffset = gtop->getEndPosition() + 1 - endPosition; } // need to do offsets for edge cases gtIteratorToChain(gtop, chain, leftOffset, rightOffset); outStream << chain; ++chain._id; } gtop->toRight(); } } catch(hal_exception& e) { cerr << "hal exception caught: " << e.what() << endl; return 1; } catch(exception& e) { cerr << "Exception caught: " << e.what() << endl; return 1; } return 0; }
void Genome::copyTopSegments(Genome *dest) const { const Genome *inParent = getParent(); const Genome *outParent = dest->getParent(); TopSegmentIteratorConstPtr inTop = getTopSegmentIterator(); TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator(); hal_size_t n = dest->getNumTopSegments(); assert(n == 0 || n == getNumTopSegments()); if (n == 0) { // Nothing to do if there are no top segments. return; } BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator(); BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator(); for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(), outTop->toRight()) { hal_index_t genomePos = inTop->getStartPosition(); assert(genomePos != NULL_INDEX); string inSeqName = getSequenceBySite(genomePos)->getName(); string outSeqName = dest->getSequenceBySite(genomePos)->getName(); // if (inSeqName != outSeqName) { // stringstream ss; // ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos; // throw hal_exception(ss.str()); // } outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength()); outTop->setParentIndex(inTop->getParentIndex()); outTop->setParentReversed(inTop->getParentReversed()); outTop->setBottomParseIndex(inTop->getBottomParseIndex()); outTop->setNextParalogyIndex(inTop->getNextParalogyIndex()); // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are. if (inTop->getParentIndex() != NULL_INDEX) { inParentBottomSegIt->toParent(inTop); const Sequence *inParentSequence = inParentBottomSegIt->getSequence(); const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName()); hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex(); hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex(); outTop->setParentIndex(outParentSegmentIndex); } } }