void TopSegmentIteratorToSiteTest::checkGenome(const Genome *genome) { TopSegmentIteratorPtr ti = genome->getTopSegmentIterator(); for (hal_index_t pos = 0; pos < (hal_index_t)genome->getSequenceLength(); ++pos) { ti->toSite(pos); CuAssertTrue(_testCase, ti->getStartPosition() == pos); CuAssertTrue(_testCase, ti->getLength() == 1); ti->toSite(pos, false); CuAssertTrue(_testCase, pos >= ti->getStartPosition() && pos < ti->getStartPosition() + (hal_index_t)ti->getLength()); CuAssertTrue(_testCase, ti->getLength() == ti->getTopSegment()->getLength()); } }
void TopSegmentIteratorParseTest::checkCallBack(const Alignment *alignment) { BottomSegmentIteratorPtr bi; TopSegmentIteratorPtr ti; // case 1 const Genome *case1 = alignment->openGenome("case1"); ti = case1->getTopSegmentIterator(); bi = case1->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); bi->slice(3, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getLength() == bi->getBottomSegment()->getLength() - 4); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); CuAssertTrue(_testCase, bi->getLength() == ti->getLength()); // case 2 const Genome *case2 = alignment->openGenome("case2"); ti = case2->getTopSegmentIterator(); bi = case2->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(1, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 3 const Genome *case3 = alignment->openGenome("case3"); ti = case3->getTopSegmentIterator(); bi = case3->getBottomSegmentIterator(); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); // case 4 const Genome *case4 = alignment->openGenome("case4"); ti = case4->getTopSegmentIterator(); bi = case4->getBottomSegmentIterator(1); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); bi->slice(2, 2); ti->toParseUp(bi); CuAssertTrue(_testCase, bi->getStartPosition() == ti->getStartPosition()); }
void GappedBottomSegmentIterator::toRightNextUngapped(TopSegmentIteratorPtr topSeqIt) const { while (topSeqIt->tseg()->hasParent() == false && topSeqIt->getLength() <= _gapThreshold) { if ((!topSeqIt->getReversed() && topSeqIt->getTopSegment()->isLast()) || (topSeqIt->getReversed() && topSeqIt->getTopSegment()->isFirst())) { break; } topSeqIt->toRight(); } }
// note: takes smart pointer as it maybe added to the results static hal_size_t mapUp(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, bool doDupes, hal_size_t minLength) { const Genome *parent = mappedSeg->getGenome()->getParent(); assert(parent != NULL); hal_size_t added = 0; if (mappedSeg->isTop() == true) { BottomSegmentIteratorPtr botSegIt = parent->getBottomSegmentIterator(); TopSegmentIteratorPtr topSegIt = mappedSeg->targetAsTop(); if (topSegIt->tseg()->hasParent() == true && topSegIt->getLength() >= minLength && (doDupes == true || topSegIt->tseg()->isCanonicalParalog() == true)) { botSegIt->toParent(topSegIt); mappedSeg->setTarget(std::dynamic_pointer_cast<SegmentIterator>(botSegIt)); results.push_back(mappedSeg); ++added; } } else { hal_index_t rightCutoff = mappedSeg->getEndPosition(); BottomSegmentIteratorPtr botSegIt = mappedSeg->targetAsBottom(); hal_index_t startOffset = (hal_index_t)botSegIt->getStartOffset(); hal_index_t endOffset = (hal_index_t)botSegIt->getEndOffset(); TopSegmentIteratorPtr topSegIt = mappedSeg->getGenome()->getTopSegmentIterator(); topSegIt->toParseUp(botSegIt); do { TopSegmentIteratorPtr newTopSegIt = topSegIt->clone(); // we map the new target back to see how the offsets have // changed. these changes are then applied to the source segment // as deltas BottomSegmentIteratorPtr backBotSegIt = botSegIt->clone(); backBotSegIt->toParseDown(newTopSegIt); hal_index_t startBack = (hal_index_t)backBotSegIt->getStartOffset(); hal_index_t endBack = (hal_index_t)backBotSegIt->getEndOffset(); assert(startBack >= startOffset); assert(endBack >= endOffset); SegmentIteratorPtr newSourceSegIt = mappedSeg->sourceClone(); hal_index_t startDelta = startBack - startOffset; hal_index_t endDelta = endBack - endOffset; assert((hal_index_t)newSourceSegIt->getLength() > startDelta + endDelta); newSourceSegIt->slice(newSourceSegIt->getStartOffset() + startDelta, newSourceSegIt->getEndOffset() + endDelta); MappedSegmentPtr newMappedSeg(new MappedSegment(newSourceSegIt, newTopSegIt)); assert(newMappedSeg->isTop() == true); assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome()); added += mapUp(newMappedSeg, results, doDupes, minLength); // stupid that we have to make this check but odn't want to // make fundamental api change now if (topSegIt->getEndPosition() != rightCutoff) { topSegIt->toRight(rightCutoff); } else { break; } } while (true); } return added; }
void TopSegmentSimpleIteratorTest::checkCallBack(const Alignment *alignment) { const Genome *ancGenome = alignment->openGenome("Anc0"); CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == _topSegments.size()); TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0); for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) { CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].compareTo(tsIt, _testCase); tsIt->toRight(); } tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1); for (hal_index_t i = ancGenome->getNumTopSegments() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].compareTo(tsIt, _testCase); tsIt->toLeft(); } tsIt = ancGenome->getTopSegmentIterator(0); tsIt->slice(0, tsIt->getLength() - 1); for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toRight(tsIt->getStartPosition() + 1); } tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1); tsIt->slice(tsIt->getLength() - 1, 0); for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toLeft(tsIt->getStartPosition() - 1); } tsIt = ancGenome->getTopSegmentIterator(0); tsIt->toReverse(); CuAssertTrue(_testCase, tsIt->getReversed() == true); tsIt->slice(tsIt->getLength() - 1, 0); for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toLeft(tsIt->getStartPosition() + 1); } tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1); tsIt->toReverse(); tsIt->slice(0, tsIt->getLength() - 1); for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toRight(tsIt->getStartPosition() - 1); } }
void GenomeCopyTest::checkCallBack(const Alignment *alignment) { // FIXME: halAlignment->open() fails miserably but // openHalAlignmentReadOnly works? Probably some state isn't cleared // on close. AlignmentPtr tmp(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS)); _secondAlignment = tmp; const Genome *ancGenome = alignment->openGenome("AncGenome"); CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome"); CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 1000000); CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 0); CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 700000); const MetaData *ancMeta = ancGenome->getMetaData(); CuAssertTrue(_testCase, ancMeta->get("Young") == "Jeezy"); const Genome *leafGenome = alignment->openGenome("LeafGenome1"); string ancSeq = "CAT"; hal_index_t n = ancGenome->getSequenceLength(); DnaIteratorPtr dnaIt = ancGenome->getDnaIterator(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % ancSeq.size(); CuAssertTrue(_testCase, dnaIt->getBase() == ancSeq[i]); } TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator(); n = leafGenome->getNumTopSegments(); for (; topIt->getArrayIndex() < n; topIt->toRight()) { CuAssertTrue(_testCase, topIt->getStartPosition() == topIt->getArrayIndex()); CuAssertTrue(_testCase, topIt->getLength() == 1); CuAssertTrue(_testCase, topIt->tseg()->getParentIndex() == 3); CuAssertTrue(_testCase, topIt->tseg()->getParentReversed() == true); CuAssertTrue(_testCase, topIt->tseg()->getBottomParseIndex() == 5); if (topIt->getArrayIndex() != 6) { CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 6); } else { CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 7); } } BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator(); n = ancGenome->getNumBottomSegments(); for (; botIt->getArrayIndex() < n; botIt->toRight()) { CuAssertTrue(_testCase, botIt->getStartPosition() == botIt->getArrayIndex()); CuAssertTrue(_testCase, botIt->getLength() == 1); CuAssertTrue(_testCase, botIt->bseg()->getChildIndex(0) == 3); CuAssertTrue(_testCase, botIt->bseg()->getChildReversed(0) == true); CuAssertTrue(_testCase, botIt->bseg()->getTopParseIndex() == 5); } const Genome *copyRootGenome = _secondAlignment->openGenome("copyRootGenome"); const Genome *copyLeafGenome = _secondAlignment->openGenome("LeafGenome1"); CuAssertTrue(_testCase, copyRootGenome->getName() == "copyRootGenome"); CuAssertTrue(_testCase, copyRootGenome->getSequenceLength() == 1000000); CuAssertTrue(_testCase, copyRootGenome->getNumTopSegments() == 0); CuAssertTrue(_testCase, copyRootGenome->getNumBottomSegments() == 700000); CuAssertTrue(_testCase, copyLeafGenome->getName() == "LeafGenome1"); CuAssertTrue(_testCase, copyLeafGenome->getSequenceLength() == 1000000); CuAssertTrue(_testCase, copyLeafGenome->getNumTopSegments() == 5000); CuAssertTrue(_testCase, copyLeafGenome->getNumBottomSegments() == 0); const MetaData *copyMeta = copyRootGenome->getMetaData(); CuAssertTrue(_testCase, copyMeta->get("Young") == "Jeezy"); n = copyRootGenome->getSequenceLength(); dnaIt = copyRootGenome->getDnaIterator(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % ancSeq.size(); CuAssertTrue(_testCase, dnaIt->getBase() == ancSeq[i]); } topIt = copyLeafGenome->getTopSegmentIterator(); n = copyLeafGenome->getNumTopSegments(); for (; topIt->getArrayIndex() < n; topIt->toRight()) { CuAssertTrue(_testCase, topIt->getStartPosition() == topIt->getArrayIndex()); CuAssertTrue(_testCase, topIt->getLength() == 1); CuAssertTrue(_testCase, topIt->tseg()->getParentIndex() == 3); CuAssertTrue(_testCase, topIt->tseg()->getParentReversed() == true); CuAssertTrue(_testCase, topIt->tseg()->getBottomParseIndex() == 5); if (topIt->getArrayIndex() != 6) { CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 6); } else { CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 7); } } botIt = copyRootGenome->getBottomSegmentIterator(); n = copyRootGenome->getNumBottomSegments(); for (; botIt->getArrayIndex() < n; botIt->toRight()) { CuAssertTrue(_testCase, botIt->getStartPosition() == botIt->getArrayIndex()); CuAssertTrue(_testCase, botIt->getLength() == 1); CuAssertTrue(_testCase, botIt->bseg()->getChildIndex(0) == 3); CuAssertTrue(_testCase, botIt->bseg()->getChildReversed(0) == true); CuAssertTrue(_testCase, botIt->bseg()->getTopParseIndex() == 5); } _secondAlignment->close(); remove(_path.c_str()); }
// note: takes smart pointer as it maybe added to the results static hal_size_t mapSelf(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, hal_size_t minLength) { hal_size_t added = 0; if (mappedSeg->isTop() == true) { SegmentIteratorPtr target = mappedSeg->getTargetIteratorPtr(); SegmentIteratorPtr source = mappedSeg->getSourceIteratorPtr(); TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(target); TopSegmentIteratorPtr topCopy = top->clone(); do { // FIXME: why isn't clone() polymorphic? SegmentIteratorPtr newSource; if (source->isTop()) { newSource = std::dynamic_pointer_cast<TopSegmentIterator>(source)->clone(); } else { newSource = std::dynamic_pointer_cast<BottomSegmentIterator>(source)->clone(); } TopSegmentIteratorPtr newTop = topCopy->clone(); MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, newTop)); assert(newMappedSeg->getGenome() == mappedSeg->getGenome()); assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome()); results.push_back(newMappedSeg); ++added; if (topCopy->tseg()->hasNextParalogy()) { topCopy->toNextParalogy(); } } while (topCopy->tseg()->hasNextParalogy() == true && topCopy->getLength() >= minLength && topCopy->getArrayIndex() != top->getArrayIndex()); } else if (mappedSeg->getGenome()->getParent() != NULL) { hal_index_t rightCutoff = mappedSeg->getEndPosition(); BottomSegmentIteratorPtr bottom = mappedSeg->targetAsBottom(); hal_index_t startOffset = (hal_index_t)bottom->getStartOffset(); hal_index_t endOffset = (hal_index_t)bottom->getEndOffset(); TopSegmentIteratorPtr top = mappedSeg->getGenome()->getTopSegmentIterator(); top->toParseUp(bottom); do { TopSegmentIteratorPtr topNew = top->clone(); // we map the new target back to see how the offsets have // changed. these changes are then applied to the source segment // as deltas BottomSegmentIteratorPtr bottomBack = bottom->clone(); bottomBack->toParseDown(topNew); hal_index_t startBack = (hal_index_t)bottomBack->getStartOffset(); hal_index_t endBack = (hal_index_t)bottomBack->getEndOffset(); assert(startBack >= startOffset); assert(endBack >= endOffset); SegmentIteratorPtr newSource = mappedSeg->sourceClone(); hal_index_t startDelta = startBack - startOffset; hal_index_t endDelta = endBack - endOffset; assert((hal_index_t)newSource->getLength() > startDelta + endDelta); newSource->slice(newSource->getStartOffset() + startDelta, newSource->getEndOffset() + endDelta); MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, topNew)); assert(newMappedSeg->isTop() == true); assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome()); added += mapSelf(newMappedSeg, results, minLength); // stupid that we have to make this check but odn't want to // make fundamental api change now if (top->getEndPosition() != rightCutoff) { top->toRight(rightCutoff); } else { break; } } while (true); } return added; }