void TopSegmentIsGapTest::createCallBack(Alignment *alignment) { size_t numSequences = 3; vector<Sequence::Info> seqVec(numSequences); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; Genome *parent1 = alignment->addRootGenome("parent1"); Genome *child1 = alignment->addLeafGenome("child1", "parent1", 1); // set up two genomes. each with three sequences. each sequence // with 5 segments of length two. start with segment i in parent // aligned with segment i in child. for (size_t i = 0; i < numSequences; ++i) { string name = "Sequence" + std::to_string(i); seqVec[i] = Sequence::Info(name, 10, 5, 5); } parent1->setDimensions(seqVec); child1->setDimensions(seqVec); for (bi = parent1->getBottomSegmentIterator(); not bi->atEnd(); bi->toRight()) { bs.set(bi->getBottomSegment()->getArrayIndex() * 2, 2); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(bi->getBottomSegment()->getArrayIndex(), false)); bs.applyTo(bi); } for (ti = child1->getTopSegmentIterator(); not ti->atEnd(); ti->toRight()) { ts.set(ti->getTopSegment()->getArrayIndex() * 2, 2, ti->getTopSegment()->getArrayIndex()); ts.applyTo(ti); } // insertion in middle (8th top segment) bi = parent1->getBottomSegmentIterator(8); ti = child1->getTopSegmentIterator(8); assert(bi->getBottomSegment()->getChildIndex(0) == 8 && ti->getTopSegment()->getParentIndex() == 8); bi->getBottomSegment()->setChildIndex(0, 9); ti->getTopSegment()->setParentIndex(NULL_INDEX); ti->toRight(); ti->getTopSegment()->setParentIndex(8); // insertion at begining (10th top segment) bi = parent1->getBottomSegmentIterator(10); ti = child1->getTopSegmentIterator(10); assert(bi->getBottomSegment()->getChildIndex(0) == 10 && ti->getTopSegment()->getParentIndex() == 10); bi->getBottomSegment()->setChildIndex(0, 11); ti->getTopSegment()->setParentIndex(NULL_INDEX); ti->toRight(); ti->getTopSegment()->setParentIndex(10); // just having a null parent is not enough for an insertion bi = parent1->getBottomSegmentIterator(2); ti = child1->getTopSegmentIterator(2); assert(bi->getBottomSegment()->getChildIndex(0) == 2 && ti->getTopSegment()->getParentIndex() == 2); ti->getTopSegment()->setParentIndex(NULL_INDEX); }
void MappedSegmentMapDupeTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // setup simple case were there is an edge from a parent to // child and it is reversed Genome* parent = alignment->addRootGenome("parent"); Genome* child1 = alignment->addLeafGenome("child1", "parent", 1); Genome* child2 = alignment->addLeafGenome("child2", "parent", 1); seqVec[0] = Sequence::Info("Sequence", 3, 0, 1); parent->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); child1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); child2->setDimensions(seqVec); parent->setString("CCC"); child1->setString("CCCTACGTG"); child2->setString("CCCTACGTG"); bi = parent->getBottomSegmentIterator(); bs.set(0, 3); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); ti = child1->getTopSegmentIterator(); ts.set(0, 3, 0, true, NULL_INDEX, 1); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 0, true, NULL_INDEX, 2); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 0, true, NULL_INDEX, 0); ts.applyTo(ti); ti = child2->getTopSegmentIterator(); ts.set(0, 3, 0, false); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, NULL_INDEX, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, NULL_INDEX, false); ts.applyTo(ti); }
void TopSegmentSimpleIteratorTest::checkCallBack(const Alignment *alignment) { const Genome *ancGenome = alignment->openGenome("Anc0"); CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == _topSegments.size()); TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0); for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) { CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].compareTo(tsIt, _testCase); tsIt->toRight(); } tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1); for (hal_index_t i = ancGenome->getNumTopSegments() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].compareTo(tsIt, _testCase); tsIt->toLeft(); } tsIt = ancGenome->getTopSegmentIterator(0); tsIt->slice(0, tsIt->getLength() - 1); for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toRight(tsIt->getStartPosition() + 1); } tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1); tsIt->slice(tsIt->getLength() - 1, 0); for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toLeft(tsIt->getStartPosition() - 1); } tsIt = ancGenome->getTopSegmentIterator(0); tsIt->toReverse(); CuAssertTrue(_testCase, tsIt->getReversed() == true); tsIt->slice(tsIt->getLength() - 1, 0); for (hal_index_t i = 0; i < (hal_index_t)ancGenome->getSequenceLength(); ++i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toLeft(tsIt->getStartPosition() + 1); } tsIt = ancGenome->getTopSegmentIterator(ancGenome->getNumTopSegments() - 1); tsIt->toReverse(); tsIt->slice(0, tsIt->getLength() - 1); for (hal_index_t i = ancGenome->getSequenceLength() - 1; i >= 0; --i) { CuAssertTrue(_testCase, tsIt->getLength() == 1); CuAssertTrue(_testCase, tsIt->getStartPosition() == i); tsIt->toRight(tsIt->getStartPosition() - 1); } }
void TopSegmentSimpleIteratorTest::createCallBack(Alignment *alignment) { Genome *ancGenome = alignment->addRootGenome("Anc0", 0); size_t numChildren = 9; for (size_t i = 0; i < numChildren; ++i) { alignment->addLeafGenome("Leaf" + std::to_string(i), "Anc0", 0.1); } vector<Sequence::Info> seqVec(1); seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 10000); ancGenome->setDimensions(seqVec); CuAssertTrue(_testCase, ancGenome->getNumChildren() == numChildren); _topSegments.clear(); for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) { TopSegmentStruct topSeg; topSeg.setRandom(); topSeg._length = ancGenome->getSequenceLength() / ancGenome->getNumTopSegments(); topSeg._startPosition = i * topSeg._length; _topSegments.push_back(topSeg); } TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0); for (size_t i = 0; not tsIt->atEnd(); tsIt->toRight(), ++i) { CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].applyTo(tsIt); } }
void GappedSegmentSimpleIteratorTest2::createCallBack(AlignmentPtr alignment) { addIdenticalParentChild(alignment, 2, 100, 5); Genome* parent = alignment->openGenome(alignment->getRootName()); Genome* child = parent->getChild(0); TopSegmentIteratorPtr ti = child->getTopSegmentIterator(); BottomSegmentIteratorPtr bi = parent->getBottomSegmentIterator(); hal_index_t i = 0; bool reversed = true; while (ti != child->getTopSegmentEndIterator()) { if (i % 5 == 0) { reversed = !reversed; if (reversed && i < (hal_index_t)(parent->getNumBottomSegments() - 1)) { makeInversion(ti, 5); } } ti->toRight(); bi->toRight(); ++i; } }
void SequenceIteratorTest::checkCallBack(const Alignment *alignment) { const Genome *ancGenome = alignment->openGenome("AncGenome"); hal_size_t numSequences = ancGenome->getNumSequences(); CuAssertTrue(_testCase, numSequences = 1000); for (SequenceIteratorPtr seqIt = ancGenome->getSequenceIterator(); not seqIt->atEnd(); seqIt->toNext()) { const Sequence *seq = seqIt->getSequence(); hal_size_t i = seq->getArrayIndex(); TopSegmentIteratorPtr tsIt = seq->getTopSegmentIterator(); hal_size_t numTopSegments = seq->getNumTopSegments(); for (hal_size_t j = 0; j < numTopSegments; ++j) { TopSegmentIteratorPtr gtsIt = ancGenome->getTopSegmentIterator((i - 1) * 100 + j); const TopSegment *gsTopSegment = gtsIt->getTopSegment(); const TopSegment *sqTopSegment = tsIt->getTopSegment(); CuAssertTrue(_testCase, gsTopSegment->getArrayIndex() == sqTopSegment->getArrayIndex()); tsIt->toRight(); } BottomSegmentIteratorPtr bsIt = seq->getBottomSegmentIterator(); hal_size_t numBottomSegments = seq->getNumBottomSegments(); for (hal_size_t j = 0; j < numBottomSegments; ++j) { BottomSegmentIteratorPtr gbsIt = ancGenome->getBottomSegmentIterator((i - 1) * 100 + j); const BottomSegment *gsBottomSegment = gbsIt->getBottomSegment(); const BottomSegment *sqBottomSegment = bsIt->getBottomSegment(); CuAssertTrue(_testCase, gsBottomSegment->getArrayIndex() == sqBottomSegment->getArrayIndex()); bsIt->toRight(); } } }
void Genome::fixParseInfo() { if (getParent() == NULL || getNumChildren() == 0) { return; } // copied from CactusHalConverter::updateRootParseInfo() in // cactus2hal/src/cactusHalConverter.cpp BottomSegmentIteratorPtr bottomIterator = getBottomSegmentIterator(); TopSegmentIteratorPtr topIterator = getTopSegmentIterator(); BottomSegmentIteratorConstPtr bend = getBottomSegmentEndIterator(); TopSegmentIteratorConstPtr tend = getTopSegmentEndIterator(); int top = 0, bot = 0; while (bottomIterator != bend && topIterator != tend) { bool bright = false; bool tright = false; BottomSegment* bseg = bottomIterator->getBottomSegment(); TopSegment* tseg = topIterator->getTopSegment(); hal_index_t bstart = bseg->getStartPosition(); hal_index_t bendidx = bstart + (hal_index_t)bseg->getLength(); hal_index_t tstart = tseg->getStartPosition(); hal_index_t tendidx = tstart + (hal_index_t)tseg->getLength(); if (bstart >= tstart && bstart < tendidx) { bseg->setTopParseIndex(tseg->getArrayIndex()); } if (bendidx <= tendidx || bstart == bendidx) { bright = true; } if (tstart >= bstart && tstart < bendidx) { tseg->setBottomParseIndex(bseg->getArrayIndex()); } if (tendidx <= bendidx || tstart == tendidx) { tright = true; } assert(bright || tright); if (bright == true) { bot += 1; bottomIterator->toRight(); } if (tright == true) { top += 1; topIterator->toRight(); } } }
void GappedBottomSegmentIterator::toRightNextUngapped(TopSegmentIteratorPtr topSeqIt) const { while (topSeqIt->tseg()->hasParent() == false && topSeqIt->getLength() <= _gapThreshold) { if ((!topSeqIt->getReversed() && topSeqIt->getTopSegment()->isLast()) || (topSeqIt->getReversed() && topSeqIt->getTopSegment()->isFirst())) { break; } topSeqIt->toRight(); } }
// note: takes smart pointer as it maybe added to the results static hal_size_t mapUp(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, bool doDupes, hal_size_t minLength) { const Genome *parent = mappedSeg->getGenome()->getParent(); assert(parent != NULL); hal_size_t added = 0; if (mappedSeg->isTop() == true) { BottomSegmentIteratorPtr botSegIt = parent->getBottomSegmentIterator(); TopSegmentIteratorPtr topSegIt = mappedSeg->targetAsTop(); if (topSegIt->tseg()->hasParent() == true && topSegIt->getLength() >= minLength && (doDupes == true || topSegIt->tseg()->isCanonicalParalog() == true)) { botSegIt->toParent(topSegIt); mappedSeg->setTarget(std::dynamic_pointer_cast<SegmentIterator>(botSegIt)); results.push_back(mappedSeg); ++added; } } else { hal_index_t rightCutoff = mappedSeg->getEndPosition(); BottomSegmentIteratorPtr botSegIt = mappedSeg->targetAsBottom(); hal_index_t startOffset = (hal_index_t)botSegIt->getStartOffset(); hal_index_t endOffset = (hal_index_t)botSegIt->getEndOffset(); TopSegmentIteratorPtr topSegIt = mappedSeg->getGenome()->getTopSegmentIterator(); topSegIt->toParseUp(botSegIt); do { TopSegmentIteratorPtr newTopSegIt = topSegIt->clone(); // we map the new target back to see how the offsets have // changed. these changes are then applied to the source segment // as deltas BottomSegmentIteratorPtr backBotSegIt = botSegIt->clone(); backBotSegIt->toParseDown(newTopSegIt); hal_index_t startBack = (hal_index_t)backBotSegIt->getStartOffset(); hal_index_t endBack = (hal_index_t)backBotSegIt->getEndOffset(); assert(startBack >= startOffset); assert(endBack >= endOffset); SegmentIteratorPtr newSourceSegIt = mappedSeg->sourceClone(); hal_index_t startDelta = startBack - startOffset; hal_index_t endDelta = endBack - endOffset; assert((hal_index_t)newSourceSegIt->getLength() > startDelta + endDelta); newSourceSegIt->slice(newSourceSegIt->getStartOffset() + startDelta, newSourceSegIt->getEndOffset() + endDelta); MappedSegmentPtr newMappedSeg(new MappedSegment(newSourceSegIt, newTopSegIt)); assert(newMappedSeg->isTop() == true); assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome()); added += mapUp(newMappedSeg, results, doDupes, minLength); // stupid that we have to make this check but odn't want to // make fundamental api change now if (topSegIt->getEndPosition() != rightCutoff) { topSegIt->toRight(rightCutoff); } else { break; } } while (true); } return added; }
// Set top segments to be equal width and so that segment 1, 2, 3, // etc. corresponds to parent segment 1, 2, 3, etc. void setTopSegments(Genome *genome, hal_size_t width) { TopSegmentIteratorPtr topIt = genome->getTopSegmentIterator(); hal_size_t n = genome->getNumTopSegments(); hal_index_t startPos = 0; for (; topIt->getArrayIndex() < n; topIt->toRight(), startPos += width) { topIt->setCoordinates(startPos, width); topIt->tseg()->setParentIndex(topIt->getArrayIndex()); topIt->tseg()->setParentReversed(false); topIt->tseg()->setBottomParseIndex(NULL_INDEX); topIt->tseg()->setNextParalogyIndex(NULL_INDEX); } }
void Genome::copyTopSegments(Genome *dest) const { const Genome *inParent = getParent(); const Genome *outParent = dest->getParent(); TopSegmentIteratorConstPtr inTop = getTopSegmentIterator(); TopSegmentIteratorPtr outTop = dest->getTopSegmentIterator(); hal_size_t n = dest->getNumTopSegments(); assert(n == 0 || n == getNumTopSegments()); if (n == 0) { // Nothing to do if there are no top segments. return; } BottomSegmentIteratorConstPtr inParentBottomSegIt = inParent->getBottomSegmentIterator(); BottomSegmentIteratorConstPtr outParentBottomSegIt = outParent->getBottomSegmentIterator(); for (; (hal_size_t)inTop->getArrayIndex() < n; inTop->toRight(), outTop->toRight()) { hal_index_t genomePos = inTop->getStartPosition(); assert(genomePos != NULL_INDEX); string inSeqName = getSequenceBySite(genomePos)->getName(); string outSeqName = dest->getSequenceBySite(genomePos)->getName(); // if (inSeqName != outSeqName) { // stringstream ss; // ss << "When copying top segments from " << getName() << " to " << dest->getName() << ": sequence " << inSeqName << " != " << outSeqName << " at site " << genomePos; // throw hal_exception(ss.str()); // } outTop->setCoordinates(inTop->getStartPosition(), inTop->getLength()); outTop->setParentIndex(inTop->getParentIndex()); outTop->setParentReversed(inTop->getParentReversed()); outTop->setBottomParseIndex(inTop->getBottomParseIndex()); outTop->setNextParalogyIndex(inTop->getNextParalogyIndex()); // Check that the sequences from the bottom segments we point to are the same. If not, correct the indices so that they are. if (inTop->getParentIndex() != NULL_INDEX) { inParentBottomSegIt->toParent(inTop); const Sequence *inParentSequence = inParentBottomSegIt->getSequence(); const Sequence *outParentSequence = outParent->getSequence(inParentSequence->getName()); hal_index_t inParentSegmentOffset = inTop->getParentIndex() - inParentSequence->getBottomSegmentArrayIndex(); hal_index_t outParentSegmentIndex = inParentSegmentOffset + outParentSequence->getBottomSegmentArrayIndex(); outTop->setParentIndex(outParentSegmentIndex); } } }
int MappedSegment::boundComp(const SegmentIteratorPtr &s1, const SegmentIteratorPtr &s2) { int res = 0; bool flip = s2->getReversed(); if (flip) { s2->toReverse(); } if (s1->isTop() && !s2->isTop()) { BottomSegmentIteratorPtr bot = std::dynamic_pointer_cast<BottomSegmentIterator>(s2); hal_index_t lb = bot->bseg()->getTopParseIndex(); hal_index_t ub = lb; if ((hal_size_t)bot->getArrayIndex() < bot->getGenome()->getNumBottomSegments() - 1) { bot = bot->clone(); bot->slice(0, 0); bot->toRight(); ub = bot->bseg()->getTopParseIndex(); } if (s1->getArrayIndex() < lb) { res = -1; } else if (s1->getArrayIndex() > ub) { res = 1; } } else if (!s1->isTop() && s2->isTop()) { TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(s2); hal_index_t lb = top->tseg()->getBottomParseIndex(); hal_index_t ub = lb; if ((hal_size_t)top->getArrayIndex() < top->getGenome()->getNumTopSegments() - 1) { top = top->clone(); top->slice(0, 0); top->toRight(); ub = top->tseg()->getBottomParseIndex(); } if (s1->getArrayIndex() < lb) { res = -1; } else if (s1->getArrayIndex() > ub) { res = 1; } } if (flip) { s2->toReverse(); } return res; }
void GappedSegmentSimpleIteratorTest::createCallBack(AlignmentPtr alignment) { addIdenticalParentChild(alignment, 2, 100, 5); Genome* parent = alignment->openGenome(alignment->getRootName()); Genome* child = parent->getChild(0); TopSegmentIteratorPtr ti = child->getTopSegmentIterator(); BottomSegmentIteratorPtr bi = parent->getBottomSegmentIterator(); int i = 0; while (ti != child->getTopSegmentEndIterator()) { if (i++ % 2) { ti->getTopSegment()->setParentReversed(true); bi->getBottomSegment()->setChildReversed(0, true); } ti->toRight(); bi->toRight(); } }
void TopSegmentIteratorToSiteTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); TopSegmentIteratorPtr ti; TopSegmentStruct ts; // case 1: single segment Genome* case1 = alignment->addRootGenome("case1"); seqVec[0] = Sequence::Info("Sequence", 10, 2, 0); case1->setDimensions(seqVec); ti = case1->getTopSegmentIterator(); ts.set(0, 9); ts.applyTo(ti); ti->toRight(); ts.set(9, 1); ts.applyTo(ti); case1 = NULL; // case 2: bunch of random segments const hal_size_t numSegs = 1133; hal_size_t total = 0; vector<hal_size_t> segLens(numSegs); for (size_t i = 0 ; i < numSegs; ++i) { hal_size_t len = rand() % 77 + 1; segLens[i] = len; total += len; assert(len > 0); } Genome* case2 = alignment->addRootGenome("case2"); seqVec[0] = Sequence::Info("Sequence", total, numSegs, 0); case2->setDimensions(seqVec); hal_index_t prev = 0; for (size_t i = 0 ; i < numSegs; ++i) { ti = case2->getTopSegmentIterator((hal_index_t)i); ts.set(prev, segLens[i]); prev += segLens[i]; ts.applyTo(ti); } }
void GenomeCopyTest::checkCallBack(const Alignment *alignment) { // FIXME: halAlignment->open() fails miserably but // openHalAlignmentReadOnly works? Probably some state isn't cleared // on close. AlignmentPtr tmp(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS)); _secondAlignment = tmp; const Genome *ancGenome = alignment->openGenome("AncGenome"); CuAssertTrue(_testCase, ancGenome->getName() == "AncGenome"); CuAssertTrue(_testCase, ancGenome->getSequenceLength() == 1000000); CuAssertTrue(_testCase, ancGenome->getNumTopSegments() == 0); CuAssertTrue(_testCase, ancGenome->getNumBottomSegments() == 700000); const MetaData *ancMeta = ancGenome->getMetaData(); CuAssertTrue(_testCase, ancMeta->get("Young") == "Jeezy"); const Genome *leafGenome = alignment->openGenome("LeafGenome1"); string ancSeq = "CAT"; hal_index_t n = ancGenome->getSequenceLength(); DnaIteratorPtr dnaIt = ancGenome->getDnaIterator(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % ancSeq.size(); CuAssertTrue(_testCase, dnaIt->getBase() == ancSeq[i]); } TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator(); n = leafGenome->getNumTopSegments(); for (; topIt->getArrayIndex() < n; topIt->toRight()) { CuAssertTrue(_testCase, topIt->getStartPosition() == topIt->getArrayIndex()); CuAssertTrue(_testCase, topIt->getLength() == 1); CuAssertTrue(_testCase, topIt->tseg()->getParentIndex() == 3); CuAssertTrue(_testCase, topIt->tseg()->getParentReversed() == true); CuAssertTrue(_testCase, topIt->tseg()->getBottomParseIndex() == 5); if (topIt->getArrayIndex() != 6) { CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 6); } else { CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 7); } } BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator(); n = ancGenome->getNumBottomSegments(); for (; botIt->getArrayIndex() < n; botIt->toRight()) { CuAssertTrue(_testCase, botIt->getStartPosition() == botIt->getArrayIndex()); CuAssertTrue(_testCase, botIt->getLength() == 1); CuAssertTrue(_testCase, botIt->bseg()->getChildIndex(0) == 3); CuAssertTrue(_testCase, botIt->bseg()->getChildReversed(0) == true); CuAssertTrue(_testCase, botIt->bseg()->getTopParseIndex() == 5); } const Genome *copyRootGenome = _secondAlignment->openGenome("copyRootGenome"); const Genome *copyLeafGenome = _secondAlignment->openGenome("LeafGenome1"); CuAssertTrue(_testCase, copyRootGenome->getName() == "copyRootGenome"); CuAssertTrue(_testCase, copyRootGenome->getSequenceLength() == 1000000); CuAssertTrue(_testCase, copyRootGenome->getNumTopSegments() == 0); CuAssertTrue(_testCase, copyRootGenome->getNumBottomSegments() == 700000); CuAssertTrue(_testCase, copyLeafGenome->getName() == "LeafGenome1"); CuAssertTrue(_testCase, copyLeafGenome->getSequenceLength() == 1000000); CuAssertTrue(_testCase, copyLeafGenome->getNumTopSegments() == 5000); CuAssertTrue(_testCase, copyLeafGenome->getNumBottomSegments() == 0); const MetaData *copyMeta = copyRootGenome->getMetaData(); CuAssertTrue(_testCase, copyMeta->get("Young") == "Jeezy"); n = copyRootGenome->getSequenceLength(); dnaIt = copyRootGenome->getDnaIterator(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % ancSeq.size(); CuAssertTrue(_testCase, dnaIt->getBase() == ancSeq[i]); } topIt = copyLeafGenome->getTopSegmentIterator(); n = copyLeafGenome->getNumTopSegments(); for (; topIt->getArrayIndex() < n; topIt->toRight()) { CuAssertTrue(_testCase, topIt->getStartPosition() == topIt->getArrayIndex()); CuAssertTrue(_testCase, topIt->getLength() == 1); CuAssertTrue(_testCase, topIt->tseg()->getParentIndex() == 3); CuAssertTrue(_testCase, topIt->tseg()->getParentReversed() == true); CuAssertTrue(_testCase, topIt->tseg()->getBottomParseIndex() == 5); if (topIt->getArrayIndex() != 6) { CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 6); } else { CuAssertTrue(_testCase, topIt->tseg()->getNextParalogyIndex() == 7); } } botIt = copyRootGenome->getBottomSegmentIterator(); n = copyRootGenome->getNumBottomSegments(); for (; botIt->getArrayIndex() < n; botIt->toRight()) { CuAssertTrue(_testCase, botIt->getStartPosition() == botIt->getArrayIndex()); CuAssertTrue(_testCase, botIt->getLength() == 1); CuAssertTrue(_testCase, botIt->bseg()->getChildIndex(0) == 3); CuAssertTrue(_testCase, botIt->bseg()->getChildReversed(0) == true); CuAssertTrue(_testCase, botIt->bseg()->getTopParseIndex() == 5); } _secondAlignment->close(); remove(_path.c_str()); }
void GenomeCopyTest::createCallBack(Alignment *alignment) { hal_size_t alignmentSize = alignment->getNumGenomes(); CuAssertTrue(_testCase, alignmentSize == 0); // Hacky: Need a different alignment to test copying the bottom // segments correctly. (the names of a node's children are used // when copying bottom segments, and two genomes can't have the same // name in the same alignment) _path = getTempFile(); _secondAlignment = AlignmentPtr(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS | CREATE_ACCESS)); Genome *ancGenome = alignment->addRootGenome("AncGenome", 0); Genome *leafGenome = alignment->addLeafGenome("LeafGenome1", "AncGenome", 0); // This genome will test copyDimensions, copyTopSegments, // copyBottomSegments, copySequence, copyMetadata Genome *copyRootGenome = _secondAlignment->addRootGenome("copyRootGenome", 0); Genome *copyLeafGenome = _secondAlignment->addLeafGenome("LeafGenome1", "copyRootGenome", 0); MetaData *ancMeta = ancGenome->getMetaData(); ancMeta->set("Young", "Jeezy"); vector<Sequence::Info> seqVec(1); seqVec[0] = Sequence::Info("Sequence", 1000000, 0, 700000); ancGenome->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 0); leafGenome->setDimensions(seqVec); string ancSeq = "CAT"; hal_index_t n = ancGenome->getSequenceLength(); DnaIteratorPtr dnaIt = ancGenome->getDnaIterator(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % ancSeq.size(); dnaIt->setBase(ancSeq[i]); } dnaIt->flush(); n = leafGenome->getSequenceLength(); dnaIt = leafGenome->getDnaIterator(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % ancSeq.size(); dnaIt->setBase(ancSeq[i]); } dnaIt->flush(); TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator(); n = leafGenome->getNumTopSegments(); for (; topIt->getArrayIndex() < n; topIt->toRight()) { topIt->setCoordinates(topIt->getArrayIndex(), 1); topIt->tseg()->setParentIndex(3); topIt->tseg()->setParentReversed(true); topIt->tseg()->setBottomParseIndex(5); if (topIt->getArrayIndex() != 6) { topIt->tseg()->setNextParalogyIndex(6); } else { topIt->tseg()->setNextParalogyIndex(7); } } BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator(); n = ancGenome->getNumBottomSegments(); for (; botIt->getArrayIndex() < n; botIt->toRight()) { botIt->setCoordinates(botIt->getArrayIndex(), 1); botIt->bseg()->setChildIndex(0, 3); botIt->bseg()->setChildReversed(0, true); botIt->bseg()->setTopParseIndex(5); } seqVec[0] = Sequence::Info("Sequence", 3300, 0, 1100); copyRootGenome->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 3300, 2200, 0); copyLeafGenome->setDimensions(seqVec); string copySeq = "TAG"; dnaIt = copyRootGenome->getDnaIterator(); n = copyRootGenome->getSequenceLength(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % copySeq.size(); dnaIt->setBase(copySeq[i]); } dnaIt->flush(); dnaIt = copyLeafGenome->getDnaIterator(); n = copyLeafGenome->getSequenceLength(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % copySeq.size(); dnaIt->setBase(copySeq[i]); } dnaIt->flush(); topIt = copyLeafGenome->getTopSegmentIterator(); n = copyLeafGenome->getNumTopSegments(); for (; topIt->getArrayIndex() < n; topIt->toRight()) { topIt->setCoordinates(7, 8); topIt->tseg()->setParentIndex(9); topIt->tseg()->setParentReversed(false); topIt->tseg()->setBottomParseIndex(11); if (topIt->getArrayIndex() != 12) { topIt->tseg()->setNextParalogyIndex(12); } else { topIt->tseg()->setNextParalogyIndex(7); } } botIt = copyRootGenome->getBottomSegmentIterator(); n = copyRootGenome->getNumBottomSegments(); for (; botIt->getArrayIndex() < n; botIt->toRight()) { botIt->setCoordinates(6, 7); botIt->bseg()->setChildIndex(0, 8); botIt->bseg()->setChildReversed(0, false); botIt->bseg()->setTopParseIndex(10); } ancGenome->copy(copyRootGenome); leafGenome->copy(copyLeafGenome); _secondAlignment->close(); }
// note: takes smart pointer as it maybe added to the results static hal_size_t mapSelf(MappedSegmentPtr mappedSeg, list<MappedSegmentPtr> &results, hal_size_t minLength) { hal_size_t added = 0; if (mappedSeg->isTop() == true) { SegmentIteratorPtr target = mappedSeg->getTargetIteratorPtr(); SegmentIteratorPtr source = mappedSeg->getSourceIteratorPtr(); TopSegmentIteratorPtr top = std::dynamic_pointer_cast<TopSegmentIterator>(target); TopSegmentIteratorPtr topCopy = top->clone(); do { // FIXME: why isn't clone() polymorphic? SegmentIteratorPtr newSource; if (source->isTop()) { newSource = std::dynamic_pointer_cast<TopSegmentIterator>(source)->clone(); } else { newSource = std::dynamic_pointer_cast<BottomSegmentIterator>(source)->clone(); } TopSegmentIteratorPtr newTop = topCopy->clone(); MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, newTop)); assert(newMappedSeg->getGenome() == mappedSeg->getGenome()); assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome()); results.push_back(newMappedSeg); ++added; if (topCopy->tseg()->hasNextParalogy()) { topCopy->toNextParalogy(); } } while (topCopy->tseg()->hasNextParalogy() == true && topCopy->getLength() >= minLength && topCopy->getArrayIndex() != top->getArrayIndex()); } else if (mappedSeg->getGenome()->getParent() != NULL) { hal_index_t rightCutoff = mappedSeg->getEndPosition(); BottomSegmentIteratorPtr bottom = mappedSeg->targetAsBottom(); hal_index_t startOffset = (hal_index_t)bottom->getStartOffset(); hal_index_t endOffset = (hal_index_t)bottom->getEndOffset(); TopSegmentIteratorPtr top = mappedSeg->getGenome()->getTopSegmentIterator(); top->toParseUp(bottom); do { TopSegmentIteratorPtr topNew = top->clone(); // we map the new target back to see how the offsets have // changed. these changes are then applied to the source segment // as deltas BottomSegmentIteratorPtr bottomBack = bottom->clone(); bottomBack->toParseDown(topNew); hal_index_t startBack = (hal_index_t)bottomBack->getStartOffset(); hal_index_t endBack = (hal_index_t)bottomBack->getEndOffset(); assert(startBack >= startOffset); assert(endBack >= endOffset); SegmentIteratorPtr newSource = mappedSeg->sourceClone(); hal_index_t startDelta = startBack - startOffset; hal_index_t endDelta = endBack - endOffset; assert((hal_index_t)newSource->getLength() > startDelta + endDelta); newSource->slice(newSource->getStartOffset() + startDelta, newSource->getEndOffset() + endDelta); MappedSegmentPtr newMappedSeg(new MappedSegment(newSource, topNew)); assert(newMappedSeg->isTop() == true); assert(newMappedSeg->getSource()->getGenome() == mappedSeg->getSource()->getGenome()); added += mapSelf(newMappedSeg, results, minLength); // stupid that we have to make this check but odn't want to // make fundamental api change now if (top->getEndPosition() != rightCutoff) { top->toRight(rightCutoff); } else { break; } } while (true); } return added; }
void MappedSegmentMapExtraParalogsTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // Set up a case where all the segments of grandChild1 coalesce with // the first segment of grandChild2, but only if using the root as // the coalescence limit. Otherwise only the first segments map to // each other. Genome* root = alignment->addRootGenome("root"); Genome* parent = alignment->addLeafGenome("parent", "root", 1); Genome* grandChild1 = alignment->addLeafGenome("grandChild1", "parent", 1); Genome* grandChild2 = alignment->addLeafGenome("grandChild2", "parent", 1); seqVec[0] = Sequence::Info("Sequence", 3, 0, 1); root->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 3); parent->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); grandChild1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); grandChild2->setDimensions(seqVec); root->setString("CCC"); parent->setString("CCCTACGTG"); grandChild1->setString("CCCTACGTG"); grandChild2->setString("CCCTACGTG"); bi = root->getBottomSegmentIterator(); bs.set(0, 3); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); ti = parent->getTopSegmentIterator(); ts.set(0, 3, 0, false, NULL_INDEX, 1); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 0, false, NULL_INDEX, 2); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 0, false, NULL_INDEX, 0); ts.applyTo(ti); bi = parent->getBottomSegmentIterator(); bs.set(0, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); bi->toRight(); bs.set(3, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(1, true)); bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, true)); bs.applyTo(bi); bi->toRight(); bs.set(6, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(2, true)); bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, false)); bs.applyTo(bi); ti = grandChild1->getTopSegmentIterator(); ts.set(0, 3, 0, true); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 1, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 2, true); ts.applyTo(ti); ti = grandChild2->getTopSegmentIterator(); ts.set(0, 3, 0, false); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, NULL_INDEX, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, NULL_INDEX, false); ts.applyTo(ti); parent->fixParseInfo(); }
void TopSegmentIteratorParseTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // case 1: bottom segment aligns perfectly with top segment Genome* case1 = alignment->addRootGenome("case1"); seqVec[0] = Sequence::Info("Sequence", 10, 2, 2); case1->setDimensions(seqVec); ti = case1->getTopSegmentIterator(); ts.set(0, 10, NULL_INDEX, false, 0, NULL_INDEX); ts.applyTo(ti); bi = case1->getBottomSegmentIterator(); bs.set(0, 10, 0); bs.applyTo(bi); // case 2: bottom segment is completely contained in top segment Genome* case2 = alignment->addRootGenome("case2"); seqVec[0] = Sequence::Info("Sequence", 10, 2, 3); case2->setDimensions(seqVec); ti = case2->getTopSegmentIterator(); ts.set(0, 9, NULL_INDEX, false, 0, NULL_INDEX); ts.applyTo(ti); bi = case2->getBottomSegmentIterator(); bs.set(0, 3, 0); bs.applyTo(bi); bi->toRight(); bs.set(3, 4, 0); bs.applyTo(bi); bi->toRight(); bs.set(7, 3, 0); bs.applyTo(bi); // case 3 top segment is completely contained in bottom segment Genome* case3 = alignment->addRootGenome("case3"); seqVec[0] = Sequence::Info("Sequence", 10, 3, 2); case3->setDimensions(seqVec); ti = case3->getTopSegmentIterator(); ts.set(0, 3, NULL_INDEX, false, 0); ts.applyTo(ti); ti->toRight(); ts.set(3, 4, NULL_INDEX, false, 0); ts.applyTo(ti); ti->toRight(); ts.set(7, 3, NULL_INDEX, false, 0); ts.applyTo(ti); bi = case3->getBottomSegmentIterator(); bs.set(0, 9, 0); bs.applyTo(bi); // case 4: top segment overhangs bottom segment on the left Genome* case4 = alignment->addRootGenome("case4"); seqVec[0] = Sequence::Info("Sequence", 10, 2, 2); case4->setDimensions(seqVec); ti = case4->getTopSegmentIterator(); ts.set(0, 9, NULL_INDEX, false, 0); ts.applyTo(ti); bi = case4->getBottomSegmentIterator(); bs.set(0, 5, 0); bs.applyTo(bi); bi->toRight(); bs.set(5, 5, 0); bs.applyTo(bi); }