void DefaultGappedBottomSegmentIterator::toRightNextUngapped( BottomSegmentIteratorConstPtr bs) const { while (bs->hasChild(_childIndex) == false && bs->getLength() <= _gapThreshold) { if ((!bs->getReversed() && bs->getBottomSegment()->isLast()) || (bs->getReversed() && bs->getBottomSegment()->isFirst())) { break; } bs->toRight(); } }
DefaultGappedBottomSegmentIterator::DefaultGappedBottomSegmentIterator( BottomSegmentIteratorConstPtr left, hal_size_t childIndex, hal_size_t gapThreshold, bool atomic) : _childIndex(childIndex), _gapThreshold(gapThreshold), _atomic(atomic) { if (left->getStartOffset() != 0 || left->getEndOffset() != 0) { throw hal_exception("offset not currently supported in gapped iterators"); } const Genome* child = left->getBottomSegment()->getGenome()->getChild(_childIndex); if (child == NULL) { throw hal_exception("can't init GappedBottomIterator with no child genome"); } assert(_atomic == false || _gapThreshold == 0); _left = left->copy(); _right = left->copy(); _temp = left->copy(); _temp2 = left->copy(); _leftChild = child->getTopSegmentIterator(); _rightChild = _leftChild->copy(); _leftDup = _leftChild->copy(); _rightDup = _leftChild->copy(); extendRight(); }
void TopSegmentIteratorParseTest::checkCallBack(AlignmentConstPtr alignment) { BottomSegmentIteratorConstPtr bi; TopSegmentIteratorConstPtr ti; // case 1 const Genome* case1 = alignment->openGenome("case1"); ti = case1->getTopSegmentIterator(); bi = case1->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); bi->slice(3, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getLength() == bi->getBottomSegment()->getLength() - 4); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); // case 2 const Genome* case2 = alignment->openGenome("case2"); ti = case2->getTopSegmentIterator(); bi = case2->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(1, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 3 const Genome* case3 = alignment->openGenome("case3"); ti = case3->getTopSegmentIterator(); bi = case3->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 4 const Genome* case4 = alignment->openGenome("case4"); ti = case4->getTopSegmentIterator(); bi = case4->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 2); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); }
////////////////////////////////////////////////////////////////////////////// // INTERNAL METHODS ////////////////////////////////////////////////////////////////////////////// bool DefaultGappedBottomSegmentIterator::compatible( BottomSegmentIteratorConstPtr left, BottomSegmentIteratorConstPtr right) const { assert(left->hasChild(_childIndex) && right->hasChild(_childIndex)); assert(left->equals(right) == false); _leftChild->toChild(left, _childIndex); _rightChild->toChild(right, _childIndex); if (_leftChild->getTopSegment()->getParentReversed() != _rightChild->getTopSegment()->getParentReversed()) { return false; } if (_leftChild->hasNextParalogy() != _rightChild->hasNextParalogy()) { return false; } if ((!_leftChild->getReversed() && _leftChild->leftOf(_rightChild->getStartPosition()) == false) || (_leftChild->getReversed() && _leftChild->rightOf(_rightChild->getStartPosition()) == false)) { return false; } if (left->getBottomSegment()->getSequence() != right->getBottomSegment()->getSequence() || _leftChild->getTopSegment()->getSequence() != _rightChild->getTopSegment()->getSequence()) { return false; } while (true) { assert(_leftChild->isLast() == false); _leftChild->toRight(); if (_leftChild->hasParent() == true || _leftChild->getLength() > _gapThreshold) { if (_leftChild->equals(_rightChild)) { break; } else { return false; } } } _leftChild->toChild(left, _childIndex); _rightChild->toChild(right, _childIndex); if (_leftChild->hasNextParalogy() == true) { _leftDup->copy(_leftChild); _leftDup->toNextParalogy(); _rightDup->copy(_rightChild); _rightDup->toNextParalogy(); if ((_leftDup->getReversed() == false && _leftDup->leftOf(_rightDup->getStartPosition()) == false) || (_leftDup->getReversed() == true && _rightDup->leftOf(_leftDup->getStartPosition()) == false)) { return false; } if (_leftDup->getTopSegment()->getSequence() != _rightDup->getTopSegment()->getSequence()) { return false; } while (true) { assert(_leftDup->isLast() == false); _leftDup->toRight(); if (_leftDup->hasParent() == true || _leftDup->getLength() > _gapThreshold) { if (_leftDup->equals(_rightDup)) { break; } else { return false; } } } } return true; }
void hal::validateSequence(const Sequence* sequence) { // Verify that the DNA sequence doesn't contain funny characters DNAIteratorConstPtr dnaIt = sequence->getDNAIterator(); hal_size_t length = sequence->getSequenceLength(); for (hal_size_t i = 0; i < length; ++i) { char c = dnaIt->getChar(); if (isNucleotide(c) == false) { stringstream ss; ss << "Non-nucleotide character discoverd at position " << i << " of sequence " << sequence->getName() << ": " << c; throw hal_exception(ss.str()); } } // Check the top segments if (sequence->getGenome()->getParent() != NULL) { hal_size_t totalTopLength = 0; TopSegmentIteratorConstPtr topIt = sequence->getTopSegmentIterator(); hal_size_t numTopSegments = sequence->getNumTopSegments(); for (hal_size_t i = 0; i < numTopSegments; ++i) { const TopSegment* topSegment = topIt->getTopSegment(); validateTopSegment(topSegment); totalTopLength += topSegment->getLength(); topIt->toRight(); } if (totalTopLength != length) { stringstream ss; ss << "Sequence " << sequence->getName() << " has length " << length << " but its top segments add up to " << totalTopLength; throw hal_exception(ss.str()); } } // Check the bottom segments if (sequence->getGenome()->getNumChildren() > 0) { hal_size_t totalBottomLength = 0; BottomSegmentIteratorConstPtr bottomIt = sequence->getBottomSegmentIterator(); hal_size_t numBottomSegments = sequence->getNumBottomSegments(); for (hal_size_t i = 0; i < numBottomSegments; ++i) { const BottomSegment* bottomSegment = bottomIt->getBottomSegment(); validateBottomSegment(bottomSegment); totalBottomLength += bottomSegment->getLength(); bottomIt->toRight(); } if (totalBottomLength != length) { stringstream ss; ss << "Sequence " << sequence->getName() << " has length " << length << " but its bottom segments add up to " << totalBottomLength; throw hal_exception(ss.str()); } } }
void hal::validateTopSegment(const TopSegment* topSegment) { const Genome* genome = topSegment->getGenome(); hal_index_t index = topSegment->getArrayIndex(); if (index < 0 || index >= (hal_index_t)genome->getSequenceLength()) { stringstream ss; ss << "Segment out of range " << index << " in genome " << genome->getName(); throw hal_exception(ss.str()); } if (topSegment->getLength() < 1) { stringstream ss; ss << "Top segment " << index << " in genome " << genome->getName() << " has length 0 which is not currently supported"; throw hal_exception(ss.str()); } const Genome* parentGenome = genome->getParent(); const hal_index_t parentIndex = topSegment->getParentIndex(); if (parentGenome != NULL && parentIndex != NULL_INDEX) { if (parentIndex >= (hal_index_t)parentGenome->getNumBottomSegments()) { stringstream ss; ss << "Parent index " << parentIndex << " of segment " << topSegment->getArrayIndex() << " out of range in genome " << parentGenome->getName(); throw hal_exception(ss.str()); } BottomSegmentIteratorConstPtr bottomSegmentIterator = parentGenome->getBottomSegmentIterator(parentIndex); const BottomSegment* parentSegment = bottomSegmentIterator->getBottomSegment(); if (topSegment->getLength() != parentSegment->getLength()) { stringstream ss; ss << "Parent length of segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has length " << parentSegment->getLength() << " which does not match " << topSegment->getLength(); throw hal_exception(ss.str()); } } const hal_index_t parseIndex = topSegment->getBottomParseIndex(); if (parseIndex == NULL_INDEX) { if (genome->getNumChildren() != 0) { stringstream ss; ss << "Top Segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has null parse index"; throw hal_exception(ss.str()); } } else { if (parseIndex >= (hal_index_t)genome->getNumBottomSegments()) { stringstream ss; ss << "Top Segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has parse index out of range"; throw hal_exception(ss.str()); } hal_offset_t parseOffset = topSegment->getBottomParseOffset(); BottomSegmentIteratorConstPtr bottomSegmentIterator = genome->getBottomSegmentIterator(parseIndex); const BottomSegment* parseSegment = bottomSegmentIterator->getBottomSegment(); if (parseOffset >= parseSegment->getLength()) { stringstream ss; ss << "Top Segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has parse offset out of range"; throw hal_exception(ss.str()); } if ((hal_index_t)parseOffset + parseSegment->getStartPosition() != topSegment->getStartPosition()) { throw hal_exception("parse index broken in top segment in genome " + genome->getName()); } } const hal_index_t paralogyIndex = topSegment->getNextParalogyIndex(); if (paralogyIndex != NULL_INDEX) { TopSegmentIteratorConstPtr pti = genome->getTopSegmentIterator(paralogyIndex); if (pti->getTopSegment()->getParentIndex() != topSegment->getParentIndex()) { stringstream ss; ss << "Top segment " << topSegment->getArrayIndex() << " has parent index " << topSegment->getParentIndex() << ", but next paraglog " << topSegment->getNextParalogyIndex() << " has parent Index " << pti->getTopSegment()->getParentIndex() << ". Paralogous top segments must share same parent."; throw hal_exception(ss.str()); } if (paralogyIndex == topSegment->getArrayIndex()) { stringstream ss; ss << "Top segment " << topSegment->getArrayIndex() << " has paralogy index " << topSegment->getNextParalogyIndex() << " which isn't allowed"; throw hal_exception(ss.str()); } } }
void GappedSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* child = alignment->openGenome("child"); const Genome* parent = alignment->openGenome("parent"); GappedTopSegmentIteratorConstPtr gtsIt = child->getGappedTopSegmentIterator(0, 9999999); GappedBottomSegmentIteratorConstPtr gbsIt = parent->getGappedBottomSegmentIterator(0, 0, 9999999); GappedTopSegmentIteratorConstPtr gtsItRev = child->getGappedTopSegmentIterator(0, 9999999); gtsItRev->toReverse(); GappedBottomSegmentIteratorConstPtr gbsItRev = parent->getGappedBottomSegmentIterator(0, 0, 9999999); gbsItRev->toReverse(); for (size_t i = 0; i < child->getNumTopSegments(); ++i) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight())); CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); gtsIt->toRight(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight())); CuAssertTrue(_testCase, (size_t)bsIt->getBottomSegment()->getArrayIndex() == i); gbsIt->toRight(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight())); CuAssertTrue(_testCase, (size_t)tsItRev->getTopSegment()->getArrayIndex() == i); gtsItRev->toLeft(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight())); CuAssertTrue(_testCase, (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i); gbsItRev->toLeft(); } gtsIt = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 1, 9999999); gbsIt = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 1, 0, 9999999); gtsItRev = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 1, 9999999); gtsItRev->toReverse(); gbsItRev = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 1, 0, 9999999); gbsItRev->toReverse(); for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; --i) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight())); CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsIt->getReversed() == false); gtsIt->toLeft(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight())); CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsIt->getReversed() == false); gbsIt->toLeft(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight())); CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsItRev->getReversed() == true); gtsItRev->toRight(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight())); CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsItRev->getReversed() == true); gbsItRev->toRight(); } }
void GappedSegmentIteratorIndelTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* child = alignment->openGenome("child"); const Genome* parent = alignment->openGenome("parent"); GappedTopSegmentIteratorConstPtr gtsIt = child->getGappedTopSegmentIterator(0, 9999999); GappedBottomSegmentIteratorConstPtr gbsIt = parent->getGappedBottomSegmentIterator(0, 0, 9999999); GappedTopSegmentIteratorConstPtr gtsItRev = child->getGappedTopSegmentIterator(0, 9999999); gtsItRev->toReverse(); GappedBottomSegmentIteratorConstPtr gbsItRev = parent->getGappedBottomSegmentIterator(0, 0, 9999999); gbsItRev->toReverse(); for (size_t i = 0; i < child->getNumTopSegments(); i += 20) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); tsIt = gtsIt->getRight(); CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i + 19); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, (size_t)bsIt->getBottomSegment()->getArrayIndex() == i); bsIt = gbsIt->getRight(); CuAssertTrue(_testCase, (size_t)bsIt->getBottomSegment()->getArrayIndex() == i + 19); GappedBottomSegmentIteratorConstPtr gappedParent = gbsIt->copy(); gappedParent->toParent(gtsIt); if (gappedParent->getReversed()) { gappedParent->toReverse(); } CuAssertTrue(_testCase, gappedParent->equals(gbsIt)); GappedTopSegmentIteratorConstPtr gappedChild = gtsIt->copy(); gappedChild->toChild(gbsIt); if (gappedChild->getReversed()) { gappedChild->toReverse(); } CuAssertTrue(_testCase, gappedChild->equals(gtsIt)); gtsIt->toRight(); gbsIt->toRight(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, (size_t)tsItRev->getTopSegment()->getArrayIndex() == i + 19); tsItRev = gtsItRev->getRight(); CuAssertTrue(_testCase, (size_t)tsItRev->getTopSegment()->getArrayIndex() == i); gtsItRev->toLeft(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i+19); bsItRev = gbsItRev->getRight(); CuAssertTrue(_testCase, (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i); gbsItRev->toLeft(); } gtsIt = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 20, 9999999); gbsIt = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 20, 0, 9999999); gtsItRev = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 20, 9999999); gtsItRev->toReverse(); gbsItRev = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 20, 0, 9999999); gbsItRev->toReverse(); for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; i -= 20) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i - 19); tsIt = gtsIt->getRight(); CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsIt->getReversed() == false); gtsIt->toLeft(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i-19); bsIt = gbsIt->getRight(); CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsIt->getReversed() == false); gbsIt->toLeft(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i); tsItRev = gtsItRev->getRight(); CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i-19); CuAssertTrue(_testCase, gtsItRev->getReversed() == true); gtsItRev->toRight(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i); bsItRev = gbsItRev->getRight(); CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex()==i-19); CuAssertTrue(_testCase, gbsItRev->getReversed() == true); gbsItRev->toRight(); } }