void MappedSegmentMapAcrossTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top) { const Genome* parent = top->getGenome()->getParent(); const Genome* other = top->getGenome()->getName() == "child1" ? alignment->openGenome("child2") : alignment->openGenome("child1"); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, other, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); TopSegmentIteratorConstPtr sister = other->getTopSegmentIterator(); sister->toChildG(bottom, other); CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == sister->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == sister->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == sister->getReversed()); }
void MappedSegmentMapUpTest::testTopSegment(AlignmentConstPtr alignment, TopSegmentIteratorConstPtr top, const string& ancName) { const Genome* parent = alignment->openGenome(ancName); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, parent, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); // extra hop for when top is in grand child if (bottom->getGenome() != parent) { TopSegmentIteratorConstPtr temp = bottom->getGenome()->getTopSegmentIterator(); temp->toParseUp(bottom); bottom->toParent(temp); } CuAssertTrue(_testCase, mseg->getGenome() == bottom->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == bottom->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == bottom->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == bottom->getReversed()); }
void Genome::copyTopSegments(Genome *dest) const { const Genome *inParent = getParent(); const Genome *outParent = dest->getParent(); TopSegmentIteratorConstPtr inTop = getTopSegmentIterator(); TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator(); hal_size_t n = dest->getNumTopSegments(); assert(n == 0 || n == getNumTopSegments()); if (n == 0) { // Nothing to do if there are no top segments. return; } BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator(); BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator(); for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(), outTop->toRight()) { hal_index_t genomePos = inTop->getStartPosition(); assert(genomePos != NULL_INDEX); string inSeqName = getSequenceBySite(genomePos)->getName(); string outSeqName = dest->getSequenceBySite(genomePos)->getName(); // if (inSeqName != outSeqName) { // stringstream ss; // ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos; // throw hal_exception(ss.str()); // } outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength()); outTop->setParentIndex(inTop->getParentIndex()); outTop->setParentReversed(inTop->getParentReversed()); outTop->setBottomParseIndex(inTop->getBottomParseIndex()); outTop->setNextParalogyIndex(inTop->getNextParalogyIndex()); // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are. if (inTop->getParentIndex() != NULL_INDEX) { inParentBottomSegIt->toParent(inTop); const Sequence *inParentSequence = inParentBottomSegIt->getSequence(); const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName()); hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex(); hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex(); outTop->setParentIndex(outParentSegmentIndex); } } }
void SummarizeMutations::subsAndGapInserts( GappedTopSegmentIteratorConstPtr gappedTop, MutationsStats& stats) { assert(gappedTop->getReversed() == false); hal_size_t numGaps = gappedTop->getNumGaps(); if (numGaps > 0) { stats._gapInsertionLength.add(gappedTop->getNumGapBases(), numGaps); } string parent, child; TopSegmentIteratorConstPtr l = gappedTop->getLeft(); TopSegmentIteratorConstPtr r = gappedTop->getRight(); BottomSegmentIteratorConstPtr p = l->getTopSegment()->getGenome()->getParent()->getBottomSegmentIterator(); for (TopSegmentIteratorConstPtr i = l->copy(); i->getTopSegment()->getArrayIndex() <= r->getTopSegment()->getArrayIndex(); i->toRight()) { if (i->hasParent()) { p->toParent(i); i->getString(child); p->getString(parent); assert(child.length() == parent.length()); for (size_t j = 0; j < child.length(); ++j) { if (isTransition(child[j], parent[j])) { ++stats._transitions; ++stats._subs; } else if (isTransversion(child[j], parent[j])) { ++stats._transversions; ++stats._subs; } else if (isSubstitution(child[j], parent[j])) { ++stats._subs; } else if (!isMissingData(child[j]) && !isMissingData(parent[j])) { ++stats._matches; } } } } }
void MappedSegmentMapDownTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome* parent = alignment->openGenome("parent"); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); testBottomSegment(alignment, bottom, 0); testBottomSegment(alignment, bottom, 1); bottom->slice(1,2); testBottomSegment(alignment, bottom, 0); testBottomSegment(alignment, bottom, 1); bottom->toReverse(); testBottomSegment(alignment, bottom, 0); testBottomSegment(alignment, bottom, 1); }
DefaultGappedBottomSegmentIterator::DefaultGappedBottomSegmentIterator( BottomSegmentIteratorConstPtr left, hal_size_t childIndex, hal_size_t gapThreshold, bool atomic) : _childIndex(childIndex), _gapThreshold(gapThreshold), _atomic(atomic) { if (left->getStartOffset() != 0 || left->getEndOffset() != 0) { throw hal_exception("offset not currently supported in gapped iterators"); } const Genome* child = left->getBottomSegment()->getGenome()->getChild(_childIndex); if (child == NULL) { throw hal_exception("can't init GappedBottomIterator with no child genome"); } assert(_atomic == false || _gapThreshold == 0); _left = left->copy(); _right = left->copy(); _temp = left->copy(); _temp2 = left->copy(); _leftChild = child->getTopSegmentIterator(); _rightChild = _leftChild->copy(); _leftDup = _leftChild->copy(); _rightDup = _leftChild->copy(); extendRight(); }
void MappedSegmentMapDownTest::testBottomSegment( AlignmentConstPtr alignment, BottomSegmentIteratorConstPtr bottom, hal_size_t childIndex) { const Genome* child = bottom->getGenome()->getChild(childIndex); set<MappedSegmentConstPtr> results; bottom->getMappedSegments(results, child, NULL, false); CuAssertTrue(_testCase, results.size() == 1); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == bottom->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == bottom->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == bottom->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == bottom->getReversed()); TopSegmentIteratorConstPtr top = child->getTopSegmentIterator(); top->toChild(bottom, childIndex); CuAssertTrue(_testCase, mseg->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == top->getReversed()); }
void DefaultGappedBottomSegmentIterator::toRightNextUngapped( BottomSegmentIteratorConstPtr bs) const { while (bs->hasChild(_childIndex) == false && bs->getLength() <= _gapThreshold) { if ((!bs->getReversed() && bs->getBottomSegment()->isLast()) || (bs->getReversed() && bs->getBottomSegment()->isFirst())) { break; } bs->toRight(); } }
////////////////////////////////////////////////////////////////////////////// // INTERNAL METHODS ////////////////////////////////////////////////////////////////////////////// bool DefaultGappedBottomSegmentIterator::compatible( BottomSegmentIteratorConstPtr left, BottomSegmentIteratorConstPtr right) const { assert(left->hasChild(_childIndex) && right->hasChild(_childIndex)); assert(left->equals(right) == false); _leftChild->toChild(left, _childIndex); _rightChild->toChild(right, _childIndex); if (_leftChild->getTopSegment()->getParentReversed() != _rightChild->getTopSegment()->getParentReversed()) { return false; } if (_leftChild->hasNextParalogy() != _rightChild->hasNextParalogy()) { return false; } if ((!_leftChild->getReversed() && _leftChild->leftOf(_rightChild->getStartPosition()) == false) || (_leftChild->getReversed() && _leftChild->rightOf(_rightChild->getStartPosition()) == false)) { return false; } if (left->getBottomSegment()->getSequence() != right->getBottomSegment()->getSequence() || _leftChild->getTopSegment()->getSequence() != _rightChild->getTopSegment()->getSequence()) { return false; } while (true) { assert(_leftChild->isLast() == false); _leftChild->toRight(); if (_leftChild->hasParent() == true || _leftChild->getLength() > _gapThreshold) { if (_leftChild->equals(_rightChild)) { break; } else { return false; } } } _leftChild->toChild(left, _childIndex); _rightChild->toChild(right, _childIndex); if (_leftChild->hasNextParalogy() == true) { _leftDup->copy(_leftChild); _leftDup->toNextParalogy(); _rightDup->copy(_rightChild); _rightDup->toNextParalogy(); if ((_leftDup->getReversed() == false && _leftDup->leftOf(_rightDup->getStartPosition()) == false) || (_leftDup->getReversed() == true && _rightDup->leftOf(_leftDup->getStartPosition()) == false)) { return false; } if (_leftDup->getTopSegment()->getSequence() != _rightDup->getTopSegment()->getSequence()) { return false; } while (true) { assert(_leftDup->isLast() == false); _leftDup->toRight(); if (_leftDup->hasParent() == true || _leftDup->getLength() > _gapThreshold) { if (_leftDup->equals(_rightDup)) { break; } else { return false; } } } } return true; }
void hal::validateSequence(const Sequence* sequence) { // Verify that the DNA sequence doesn't contain funny characters DNAIteratorConstPtr dnaIt = sequence->getDNAIterator(); hal_size_t length = sequence->getSequenceLength(); for (hal_size_t i = 0; i < length; ++i) { char c = dnaIt->getChar(); if (isNucleotide(c) == false) { stringstream ss; ss << "Non-nucleotide character discoverd at position " << i << " of sequence " << sequence->getName() << ": " << c; throw hal_exception(ss.str()); } } // Check the top segments if (sequence->getGenome()->getParent() != NULL) { hal_size_t totalTopLength = 0; TopSegmentIteratorConstPtr topIt = sequence->getTopSegmentIterator(); hal_size_t numTopSegments = sequence->getNumTopSegments(); for (hal_size_t i = 0; i < numTopSegments; ++i) { const TopSegment* topSegment = topIt->getTopSegment(); validateTopSegment(topSegment); totalTopLength += topSegment->getLength(); topIt->toRight(); } if (totalTopLength != length) { stringstream ss; ss << "Sequence " << sequence->getName() << " has length " << length << " but its top segments add up to " << totalTopLength; throw hal_exception(ss.str()); } } // Check the bottom segments if (sequence->getGenome()->getNumChildren() > 0) { hal_size_t totalBottomLength = 0; BottomSegmentIteratorConstPtr bottomIt = sequence->getBottomSegmentIterator(); hal_size_t numBottomSegments = sequence->getNumBottomSegments(); for (hal_size_t i = 0; i < numBottomSegments; ++i) { const BottomSegment* bottomSegment = bottomIt->getBottomSegment(); validateBottomSegment(bottomSegment); totalBottomLength += bottomSegment->getLength(); bottomIt->toRight(); } if (totalBottomLength != length) { stringstream ss; ss << "Sequence " << sequence->getName() << " has length " << length << " but its bottom segments add up to " << totalBottomLength; throw hal_exception(ss.str()); } } }
void hal::validateTopSegment(const TopSegment* topSegment) { const Genome* genome = topSegment->getGenome(); hal_index_t index = topSegment->getArrayIndex(); if (index < 0 || index >= (hal_index_t)genome->getSequenceLength()) { stringstream ss; ss << "Segment out of range " << index << " in genome " << genome->getName(); throw hal_exception(ss.str()); } if (topSegment->getLength() < 1) { stringstream ss; ss << "Top segment " << index << " in genome " << genome->getName() << " has length 0 which is not currently supported"; throw hal_exception(ss.str()); } const Genome* parentGenome = genome->getParent(); const hal_index_t parentIndex = topSegment->getParentIndex(); if (parentGenome != NULL && parentIndex != NULL_INDEX) { if (parentIndex >= (hal_index_t)parentGenome->getNumBottomSegments()) { stringstream ss; ss << "Parent index " << parentIndex << " of segment " << topSegment->getArrayIndex() << " out of range in genome " << parentGenome->getName(); throw hal_exception(ss.str()); } BottomSegmentIteratorConstPtr bottomSegmentIterator = parentGenome->getBottomSegmentIterator(parentIndex); const BottomSegment* parentSegment = bottomSegmentIterator->getBottomSegment(); if (topSegment->getLength() != parentSegment->getLength()) { stringstream ss; ss << "Parent length of segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has length " << parentSegment->getLength() << " which does not match " << topSegment->getLength(); throw hal_exception(ss.str()); } } const hal_index_t parseIndex = topSegment->getBottomParseIndex(); if (parseIndex == NULL_INDEX) { if (genome->getNumChildren() != 0) { stringstream ss; ss << "Top Segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has null parse index"; throw hal_exception(ss.str()); } } else { if (parseIndex >= (hal_index_t)genome->getNumBottomSegments()) { stringstream ss; ss << "Top Segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has parse index out of range"; throw hal_exception(ss.str()); } hal_offset_t parseOffset = topSegment->getBottomParseOffset(); BottomSegmentIteratorConstPtr bottomSegmentIterator = genome->getBottomSegmentIterator(parseIndex); const BottomSegment* parseSegment = bottomSegmentIterator->getBottomSegment(); if (parseOffset >= parseSegment->getLength()) { stringstream ss; ss << "Top Segment " << topSegment->getArrayIndex() << " in genome " << genome->getName() << " has parse offset out of range"; throw hal_exception(ss.str()); } if ((hal_index_t)parseOffset + parseSegment->getStartPosition() != topSegment->getStartPosition()) { throw hal_exception("parse index broken in top segment in genome " + genome->getName()); } } const hal_index_t paralogyIndex = topSegment->getNextParalogyIndex(); if (paralogyIndex != NULL_INDEX) { TopSegmentIteratorConstPtr pti = genome->getTopSegmentIterator(paralogyIndex); if (pti->getTopSegment()->getParentIndex() != topSegment->getParentIndex()) { stringstream ss; ss << "Top segment " << topSegment->getArrayIndex() << " has parent index " << topSegment->getParentIndex() << ", but next paraglog " << topSegment->getNextParalogyIndex() << " has parent Index " << pti->getTopSegment()->getParentIndex() << ". Paralogous top segments must share same parent."; throw hal_exception(ss.str()); } if (paralogyIndex == topSegment->getArrayIndex()) { stringstream ss; ss << "Top segment " << topSegment->getArrayIndex() << " has paralogy index " << topSegment->getNextParalogyIndex() << " which isn't allowed"; throw hal_exception(ss.str()); } } }
void GappedSegmentSimpleIteratorTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* child = alignment->openGenome("child"); const Genome* parent = alignment->openGenome("parent"); GappedTopSegmentIteratorConstPtr gtsIt = child->getGappedTopSegmentIterator(0, 9999999); GappedBottomSegmentIteratorConstPtr gbsIt = parent->getGappedBottomSegmentIterator(0, 0, 9999999); GappedTopSegmentIteratorConstPtr gtsItRev = child->getGappedTopSegmentIterator(0, 9999999); gtsItRev->toReverse(); GappedBottomSegmentIteratorConstPtr gbsItRev = parent->getGappedBottomSegmentIterator(0, 0, 9999999); gbsItRev->toReverse(); for (size_t i = 0; i < child->getNumTopSegments(); ++i) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight())); CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); gtsIt->toRight(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight())); CuAssertTrue(_testCase, (size_t)bsIt->getBottomSegment()->getArrayIndex() == i); gbsIt->toRight(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight())); CuAssertTrue(_testCase, (size_t)tsItRev->getTopSegment()->getArrayIndex() == i); gtsItRev->toLeft(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight())); CuAssertTrue(_testCase, (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i); gbsItRev->toLeft(); } gtsIt = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 1, 9999999); gbsIt = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 1, 0, 9999999); gtsItRev = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 1, 9999999); gtsItRev->toReverse(); gbsItRev = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 1, 0, 9999999); gbsItRev->toReverse(); for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; --i) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->equals(gtsIt->getRight())); CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsIt->getReversed() == false); gtsIt->toLeft(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->equals(gbsIt->getRight())); CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsIt->getReversed() == false); gbsIt->toLeft(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->equals(gtsItRev->getRight())); CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsItRev->getReversed() == true); gtsItRev->toRight(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->equals(gbsItRev->getRight())); CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsItRev->getReversed() == true); gbsItRev->toRight(); } }
void GappedSegmentIteratorIndelTest::checkCallBack(AlignmentConstPtr alignment) { const Genome* child = alignment->openGenome("child"); const Genome* parent = alignment->openGenome("parent"); GappedTopSegmentIteratorConstPtr gtsIt = child->getGappedTopSegmentIterator(0, 9999999); GappedBottomSegmentIteratorConstPtr gbsIt = parent->getGappedBottomSegmentIterator(0, 0, 9999999); GappedTopSegmentIteratorConstPtr gtsItRev = child->getGappedTopSegmentIterator(0, 9999999); gtsItRev->toReverse(); GappedBottomSegmentIteratorConstPtr gbsItRev = parent->getGappedBottomSegmentIterator(0, 0, 9999999); gbsItRev->toReverse(); for (size_t i = 0; i < child->getNumTopSegments(); i += 20) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); tsIt = gtsIt->getRight(); CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i + 19); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, (size_t)bsIt->getBottomSegment()->getArrayIndex() == i); bsIt = gbsIt->getRight(); CuAssertTrue(_testCase, (size_t)bsIt->getBottomSegment()->getArrayIndex() == i + 19); GappedBottomSegmentIteratorConstPtr gappedParent = gbsIt->copy(); gappedParent->toParent(gtsIt); if (gappedParent->getReversed()) { gappedParent->toReverse(); } CuAssertTrue(_testCase, gappedParent->equals(gbsIt)); GappedTopSegmentIteratorConstPtr gappedChild = gtsIt->copy(); gappedChild->toChild(gbsIt); if (gappedChild->getReversed()) { gappedChild->toReverse(); } CuAssertTrue(_testCase, gappedChild->equals(gtsIt)); gtsIt->toRight(); gbsIt->toRight(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, (size_t)tsItRev->getTopSegment()->getArrayIndex() == i + 19); tsItRev = gtsItRev->getRight(); CuAssertTrue(_testCase, (size_t)tsItRev->getTopSegment()->getArrayIndex() == i); gtsItRev->toLeft(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i+19); bsItRev = gbsItRev->getRight(); CuAssertTrue(_testCase, (size_t)bsItRev->getBottomSegment()->getArrayIndex() == i); gbsItRev->toLeft(); } gtsIt = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 20, 9999999); gbsIt = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 20, 0, 9999999); gtsItRev = child->getGappedTopSegmentIterator( child->getNumTopSegments() - 20, 9999999); gtsItRev->toReverse(); gbsItRev = parent->getGappedBottomSegmentIterator( child->getNumTopSegments() - 20, 0, 9999999); gbsItRev->toReverse(); for (hal_index_t i = child->getNumTopSegments() - 1; i >= 0; i -= 20) { TopSegmentIteratorConstPtr tsIt = gtsIt->getLeft(); CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i - 19); tsIt = gtsIt->getRight(); CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gtsIt->getReversed() == false); gtsIt->toLeft(); BottomSegmentIteratorConstPtr bsIt = gbsIt->getLeft(); CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i-19); bsIt = gbsIt->getRight(); CuAssertTrue(_testCase, bsIt->getBottomSegment()->getArrayIndex() == i); CuAssertTrue(_testCase, gbsIt->getReversed() == false); gbsIt->toLeft(); TopSegmentIteratorConstPtr tsItRev = gtsItRev->getLeft(); CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i); tsItRev = gtsItRev->getRight(); CuAssertTrue(_testCase, tsItRev->getTopSegment()->getArrayIndex() == i-19); CuAssertTrue(_testCase, gtsItRev->getReversed() == true); gtsItRev->toRight(); BottomSegmentIteratorConstPtr bsItRev = gbsItRev->getLeft(); CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex() == i); bsItRev = gbsItRev->getRight(); CuAssertTrue(_testCase, bsItRev->getBottomSegment()->getArrayIndex()==i-19); CuAssertTrue(_testCase, gbsItRev->getReversed() == true); gbsItRev->toRight(); } }
// quickly count subsitutions without loading rearrangement machinery. // used for benchmarks for basic file scanning... and not much else since // the interface is still a bit wonky. void SummarizeMutations::substitutionAnalysis(const Genome* genome, MutationsStats& stats) { assert(stats._subs == 0); if (genome->getNumChildren() == 0 || genome->getNumBottomSegments() == 0 || (_targetSet && _targetSet->find(genome->getName()) == _targetSet->end())) { return; } const Genome* parent = genome->getParent(); string pname = parent != NULL ? parent->getName() : string(); StrPair branchName(genome->getName(), pname); BottomSegmentIteratorConstPtr bottom = genome->getBottomSegmentIterator(); TopSegmentIteratorConstPtr top = genome->getChild(0)->getTopSegmentIterator(); string gString, cString; hal_size_t n = genome->getNumBottomSegments(); vector<hal_size_t> children; hal_size_t m = genome->getNumChildren(); for (hal_size_t i = 0; i < m; ++i) { string cName = genome->getChild(i)->getName(); if (!_targetSet || (_targetSet && _targetSet->find(cName) != _targetSet->end())) { children.push_back(i); } } if (children.empty()) { return; } for (hal_size_t i = 0; i < n; ++i) { bool readString = false; for (size_t j = 0; j < children.size(); ++j) { if (bottom->hasChild(children[j])) { if (readString == false) { bottom->getString(gString); readString = true; } top->toChild(bottom, children[j]); top->getString(cString); assert(gString.length() == cString.length()); for (hal_size_t k = 0; k < gString.length(); ++k) { if (isSubstitution(gString[k], cString[k])) { ++stats._subs; } } } } bottom->toRight(); } }
void TopSegmentIteratorReverseTest::checkCallBack(AlignmentConstPtr alignment) { BottomSegmentIteratorConstPtr bi; TopSegmentIteratorConstPtr ti, ti2; const Genome* parent1 = alignment->openGenome("parent1"); const Genome* child1 = alignment->openGenome("child1"); ti = child1->getTopSegmentIterator(); bi = parent1->getBottomSegmentIterator(); ti2 = child1->getTopSegmentIterator(); ti2->toChild(bi, 0); CuAssertTrue(_testCase, ti->getStartPosition() == 0); CuAssertTrue(_testCase, ti->getLength() == 10); CuAssertTrue(_testCase, ti->getReversed() == false); CuAssertTrue(_testCase, ti2->getStartPosition() == 9); CuAssertTrue(_testCase, ti2->getLength() == 10); CuAssertTrue(_testCase, ti2->getReversed() == true); bi->slice(1, 3); ti2->toChild(bi, 0); CuAssertTrue(_testCase, bi->getStartPosition() == 1); CuAssertTrue(_testCase, bi->getLength() == 6); CuAssertTrue(_testCase, ti2->getStartPosition() == 8); CuAssertTrue(_testCase, ti2->getLength() == 6); string buffer; bi->getString(buffer); CuAssertTrue(_testCase, buffer == "CCTACG"); ti2->getString(buffer); CuAssertTrue(_testCase, buffer == "CACGTA"); bi = child1->getBottomSegmentIterator(); CuAssertTrue(_testCase, bi->getReversed() == false); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 0); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 4); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); CuAssertTrue(_testCase, bi->getReversed() == false); bi->toRight(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 5); CuAssertTrue(_testCase, ti->getLength() == 5); bi->toReverse(); ti->toParseUp(bi); CuAssertTrue(_testCase, ti->getStartPosition() == 9); CuAssertTrue(_testCase, ti->getLength() == 5); }
void TopSegmentIteratorParseTest::checkCallBack(AlignmentConstPtr alignment) { BottomSegmentIteratorConstPtr bi; TopSegmentIteratorConstPtr ti; // case 1 const Genome* case1 = alignment->openGenome("case1"); ti = case1->getTopSegmentIterator(); bi = case1->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); bi->slice(3, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getLength() == bi->getBottomSegment()->getLength() - 4); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); // case 2 const Genome* case2 = alignment->openGenome("case2"); ti = case2->getTopSegmentIterator(); bi = case2->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(1, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 3 const Genome* case3 = alignment->openGenome("case3"); ti = case3->getTopSegmentIterator(); bi = case3->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 4 const Genome* case4 = alignment->openGenome("case4"); ti = case4->getTopSegmentIterator(); bi = case4->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 2); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); }
void MappedSegmentMapDupeTest::checkCallBack(AlignmentConstPtr alignment) { validateAlignment(alignment); const Genome* parent = alignment->openGenome("parent"); const Genome* child1 = alignment->openGenome("child1"); const Genome* child2 = alignment->openGenome("child2"); TopSegmentIteratorConstPtr top = child1->getTopSegmentIterator(); set<MappedSegmentConstPtr> results; top->getMappedSegments(results, child2, NULL, true); // CuAssertTrue(_testCase, results.size() == 3); MappedSegmentConstPtr mseg = *results.begin(); CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); TopSegmentIteratorConstPtr sister = child2->getTopSegmentIterator(); sister->toChildG(bottom, child2); CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome()); CuAssertTrue(_testCase, mseg->getStartPosition() == sister->getStartPosition()); CuAssertTrue(_testCase, mseg->getLength() == sister->getLength()); CuAssertTrue(_testCase, mseg->getReversed() == sister->getReversed()); top = child2->getTopSegmentIterator(); results.clear(); sister = child1->getTopSegmentIterator(); top->getMappedSegments(results, child1, NULL, true); CuAssertTrue(_testCase, results.size() == 3); bool found[3] = {false}; set<MappedSegmentConstPtr>::iterator i = results.begin(); for (; i != results.end(); ++i) { MappedSegmentConstPtr mseg = *i; CuAssertTrue(_testCase, mseg->getSource()->getGenome() == top->getGenome()); CuAssertTrue(_testCase, mseg->getSource()->getStartPosition() == top->getStartPosition()); CuAssertTrue(_testCase, mseg->getSource()->getLength() == top->getLength()); CuAssertTrue(_testCase, mseg->getSource()->getReversed() == top->getReversed()); BottomSegmentIteratorConstPtr bottom = parent->getBottomSegmentIterator(); bottom->toParent(top); TopSegmentIteratorConstPtr sister = child2->getTopSegmentIterator(); sister->toChildG(bottom, child1); CuAssertTrue(_testCase, mseg->getGenome() == sister->getGenome()); CuAssertTrue(_testCase, mseg->getLength() == sister->getLength()); found[mseg->getArrayIndex()] = true; } CuAssertTrue(_testCase, found[0] == true); CuAssertTrue(_testCase, found[1] == true); CuAssertTrue(_testCase, found[2] == true); }