void TopSegmentSimpleIteratorTest::createCallBack(Alignment *alignment) { Genome *ancGenome = alignment->addRootGenome("Anc0", 0); size_t numChildren = 9; for (size_t i = 0; i < numChildren; ++i) { alignment->addLeafGenome("Leaf" + std::to_string(i), "Anc0", 0.1); } vector<Sequence::Info> seqVec(1); seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 10000); ancGenome->setDimensions(seqVec); CuAssertTrue(_testCase, ancGenome->getNumChildren() == numChildren); _topSegments.clear(); for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) { TopSegmentStruct topSeg; topSeg.setRandom(); topSeg._length = ancGenome->getSequenceLength() / ancGenome->getNumTopSegments(); topSeg._startPosition = i * topSeg._length; _topSegments.push_back(topSeg); } TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0); for (size_t i = 0; not tsIt->atEnd(); tsIt->toRight(), ++i) { CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].applyTo(tsIt); } }
// Test copying when the sequences aren't in the same order. // // Create an alignment with "Sequence1" positions aligned to // "Sequence1" positions, and "Sequence2" to "Sequence2", but try // copying the segments to an alignment with "Sequence2" before // "Sequence1" in the ordering. void GenomeCopySegmentsWhenSequencesOutOfOrderTest::createCallBack(Alignment *alignment) { hal_size_t alignmentSize = alignment->getNumGenomes(); CuAssertTrue(_testCase, alignmentSize == 0); // Hacky: Need a different alignment to test copying the bottom // segments correctly. (the names of a node's children are used // when copying bottom segments, and two genomes can't have the same // name in the same alignment) _path = getTempFile(); _secondAlignment = AlignmentPtr(getTestAlignmentInstances(alignment->getStorageFormat(), _path, CREATE_ACCESS)); Genome *rootGenome = alignment->addRootGenome("root", 0); Genome *internalGenome = alignment->addLeafGenome("internal", "root", 0); Genome *leaf1Genome = alignment->addLeafGenome("leaf1", "root", 0); Genome *leaf2Genome = alignment->addLeafGenome("leaf2", "internal", 0); Genome *copyRootGenome = _secondAlignment->addRootGenome("root", 0); Genome *copyInternalGenome = _secondAlignment->addLeafGenome("internal", "root", 0); Genome *copyLeaf1Genome = _secondAlignment->addLeafGenome("leaf1", "root", 0); Genome *copyLeaf2Genome = _secondAlignment->addLeafGenome("leaf2", "internal", 0); vector<Sequence::Info> seqVec(2); seqVec[0] = Sequence::Info("Sequence1", 130, 0, 13); seqVec[1] = Sequence::Info("Sequence2", 170, 0, 17); rootGenome->setDimensions(seqVec); rootGenome->setString(randomString(rootGenome->getSequenceLength())); seqVec[0] = Sequence::Info("Sequence1", 130, 13, 13); seqVec[1] = Sequence::Info("Sequence2", 170, 17, 17); internalGenome->setDimensions(seqVec); internalGenome->setString(randomString(internalGenome->getSequenceLength())); seqVec[0] = Sequence::Info("Sequence1", 130, 13, 0); seqVec[1] = Sequence::Info("Sequence2", 170, 17, 0); leaf1Genome->setDimensions(seqVec); leaf1Genome->setString(randomString(leaf1Genome->getSequenceLength())); leaf2Genome->setDimensions(seqVec); leaf2Genome->setString(randomString(leaf2Genome->getSequenceLength())); setTopSegments(internalGenome, 10); setTopSegments(leaf1Genome, 10); setTopSegments(leaf2Genome, 10); setBottomSegments(rootGenome, 10); setBottomSegments(internalGenome, 10); rootGenome->fixParseInfo(); internalGenome->fixParseInfo(); leaf1Genome->fixParseInfo(); leaf2Genome->fixParseInfo(); seqVec[0] = Sequence::Info("Sequence1", 130, 0, 13); seqVec[1] = Sequence::Info("Sequence2", 170, 0, 17); copyRootGenome->setDimensions(seqVec); copyRootGenome->setString(randomString(copyRootGenome->getSequenceLength())); seqVec[0] = Sequence::Info("Sequence1", 130, 13, 0); seqVec[1] = Sequence::Info("Sequence2", 170, 17, 0); copyLeaf1Genome->setDimensions(seqVec); copyLeaf2Genome->setDimensions(seqVec); copyLeaf1Genome->setString(randomString(copyLeaf1Genome->getSequenceLength())); copyLeaf2Genome->setString(randomString(copyLeaf2Genome->getSequenceLength())); seqVec[0] = Sequence::Info("Sequence2", 170, 17, 17); seqVec[1] = Sequence::Info("Sequence1", 130, 13, 13); copyInternalGenome->setDimensions(seqVec); copyInternalGenome->setString(randomString(copyInternalGenome->getSequenceLength())); rootGenome->copyBottomDimensions(copyRootGenome); rootGenome->copyBottomSegments(copyRootGenome); copyRootGenome->fixParseInfo(); internalGenome->copyBottomDimensions(copyInternalGenome); internalGenome->copyBottomSegments(copyInternalGenome); internalGenome->copyTopDimensions(copyInternalGenome); internalGenome->copyTopSegments(copyInternalGenome); copyInternalGenome->fixParseInfo(); leaf1Genome->copyTopDimensions(copyLeaf1Genome); leaf1Genome->copyTopSegments(copyLeaf1Genome); copyLeaf1Genome->fixParseInfo(); leaf2Genome->copyTopDimensions(copyLeaf2Genome); leaf2Genome->copyTopSegments(copyLeaf2Genome); copyLeaf2Genome->fixParseInfo(); _secondAlignment->close(); }
void GenomeCopyTest::createCallBack(Alignment *alignment) { hal_size_t alignmentSize = alignment->getNumGenomes(); CuAssertTrue(_testCase, alignmentSize == 0); // Hacky: Need a different alignment to test copying the bottom // segments correctly. (the names of a node's children are used // when copying bottom segments, and two genomes can't have the same // name in the same alignment) _path = getTempFile(); _secondAlignment = AlignmentPtr(getTestAlignmentInstances(alignment->getStorageFormat(), _path, WRITE_ACCESS | CREATE_ACCESS)); Genome *ancGenome = alignment->addRootGenome("AncGenome", 0); Genome *leafGenome = alignment->addLeafGenome("LeafGenome1", "AncGenome", 0); // This genome will test copyDimensions, copyTopSegments, // copyBottomSegments, copySequence, copyMetadata Genome *copyRootGenome = _secondAlignment->addRootGenome("copyRootGenome", 0); Genome *copyLeafGenome = _secondAlignment->addLeafGenome("LeafGenome1", "copyRootGenome", 0); MetaData *ancMeta = ancGenome->getMetaData(); ancMeta->set("Young", "Jeezy"); vector<Sequence::Info> seqVec(1); seqVec[0] = Sequence::Info("Sequence", 1000000, 0, 700000); ancGenome->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 0); leafGenome->setDimensions(seqVec); string ancSeq = "CAT"; hal_index_t n = ancGenome->getSequenceLength(); DnaIteratorPtr dnaIt = ancGenome->getDnaIterator(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % ancSeq.size(); dnaIt->setBase(ancSeq[i]); } dnaIt->flush(); n = leafGenome->getSequenceLength(); dnaIt = leafGenome->getDnaIterator(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % ancSeq.size(); dnaIt->setBase(ancSeq[i]); } dnaIt->flush(); TopSegmentIteratorPtr topIt = leafGenome->getTopSegmentIterator(); n = leafGenome->getNumTopSegments(); for (; topIt->getArrayIndex() < n; topIt->toRight()) { topIt->setCoordinates(topIt->getArrayIndex(), 1); topIt->tseg()->setParentIndex(3); topIt->tseg()->setParentReversed(true); topIt->tseg()->setBottomParseIndex(5); if (topIt->getArrayIndex() != 6) { topIt->tseg()->setNextParalogyIndex(6); } else { topIt->tseg()->setNextParalogyIndex(7); } } BottomSegmentIteratorPtr botIt = ancGenome->getBottomSegmentIterator(); n = ancGenome->getNumBottomSegments(); for (; botIt->getArrayIndex() < n; botIt->toRight()) { botIt->setCoordinates(botIt->getArrayIndex(), 1); botIt->bseg()->setChildIndex(0, 3); botIt->bseg()->setChildReversed(0, true); botIt->bseg()->setTopParseIndex(5); } seqVec[0] = Sequence::Info("Sequence", 3300, 0, 1100); copyRootGenome->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 3300, 2200, 0); copyLeafGenome->setDimensions(seqVec); string copySeq = "TAG"; dnaIt = copyRootGenome->getDnaIterator(); n = copyRootGenome->getSequenceLength(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % copySeq.size(); dnaIt->setBase(copySeq[i]); } dnaIt->flush(); dnaIt = copyLeafGenome->getDnaIterator(); n = copyLeafGenome->getSequenceLength(); for (; dnaIt->getArrayIndex() < n; dnaIt->toRight()) { size_t i = dnaIt->getArrayIndex() % copySeq.size(); dnaIt->setBase(copySeq[i]); } dnaIt->flush(); topIt = copyLeafGenome->getTopSegmentIterator(); n = copyLeafGenome->getNumTopSegments(); for (; topIt->getArrayIndex() < n; topIt->toRight()) { topIt->setCoordinates(7, 8); topIt->tseg()->setParentIndex(9); topIt->tseg()->setParentReversed(false); topIt->tseg()->setBottomParseIndex(11); if (topIt->getArrayIndex() != 12) { topIt->tseg()->setNextParalogyIndex(12); } else { topIt->tseg()->setNextParalogyIndex(7); } } botIt = copyRootGenome->getBottomSegmentIterator(); n = copyRootGenome->getNumBottomSegments(); for (; botIt->getArrayIndex() < n; botIt->toRight()) { botIt->setCoordinates(6, 7); botIt->bseg()->setChildIndex(0, 8); botIt->bseg()->setChildReversed(0, false); botIt->bseg()->setTopParseIndex(10); } ancGenome->copy(copyRootGenome); leafGenome->copy(copyLeafGenome); _secondAlignment->close(); }
void MappedSegmentMapUpTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // setup simple case were there is an edge from a parent to // child1 and it is reversed and nonreversed to child2 Genome* parent = alignment->addRootGenome("parent"); Genome* child1 = alignment->addLeafGenome("child1", "parent", 1); Genome* child2 = alignment->addLeafGenome("child2", "parent", 1); // add a bunch of grandchildren with no rearrangemnts to test // simple parsing Genome* g1 = alignment->addLeafGenome("g1", "child2", 1); Genome* g2 = alignment->addLeafGenome("g2", "g1", 1); Genome* g3 = alignment->addLeafGenome("g3", "g2", 1); Genome* g4 = alignment->addLeafGenome("g4", "g3", 1); Genome* g5 = alignment->addLeafGenome("g5", "g4", 1); // add some with random inversions Genome* gi1 = alignment->addLeafGenome("gi1", "child1", 1); Genome* gi2 = alignment->addLeafGenome("gi2", "gi1", 1); Genome* gi3 = alignment->addLeafGenome("gi3", "gi2", 1); Genome* gi4 = alignment->addLeafGenome("gi4", "gi3", 1); Genome* gi5 = alignment->addLeafGenome("gi5", "gi4", 1); Genome* gs[] = {g1, g2, g3, g4, g5}; Genome* gis[] = {gi1, gi2, gi3, gi4, gi5}; seqVec[0] = Sequence::Info("Sequence", 12, 0, 1); parent->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 1, 6); child1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 1, 6); child2->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 6, 4); g1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 4, 3); g2->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 3, 2); g3->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 2, 12); g4->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 12, 0); g5->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 6, 4); gi1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 4, 3); gi2->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 3, 2); gi3->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 2, 12); gi4->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 12, 0); gi5->setDimensions(seqVec); parent->setString("CCCTACTTGTGC"); child1->setString("CCCTACTTGTGC"); child2->setString("CCCTACTTGTGC"); for (size_t i = 0; i < 5; ++i) { gs[i]->setString("TCCTACTTGTGC"); gis[i]->setString("TCCTACTTGTGC"); } bi = parent->getBottomSegmentIterator(); bs.set(0, 12); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); ti = child1->getTopSegmentIterator(); ts.set(0, 12, 0, true, 0); ts.applyTo(ti); ti = child2->getTopSegmentIterator(); ts.set(0, 12, 0, false, 0); ts.applyTo(ti); for (size_t i = 0; i < 6; ++i) { bi = child2->getBottomSegmentIterator(i); bs.set(i * 2, 2, 0); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(i, false)); bs.applyTo(bi); ti = g1->getTopSegmentIterator(i); ts.set(i * 2, 2, i, false); ts.applyTo(ti); } for (size_t i = 0; i < 6; ++i) { bi = child1->getBottomSegmentIterator(i); bs.set(i * 2, 2, 0); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(i, false)); bs.applyTo(bi); ti = gi1->getTopSegmentIterator(i); ts.set(i * 2, 2, i, false); ts.applyTo(ti); } for (size_t i = 0; i < 5; ++i) { const Genome* g = gs[i]; const Genome* parent = g->getParent(); const Genome* child = i == 4 ? NULL : g->getChild(0); hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments(); hal_size_t psegLen = parent->getSequenceLength() / parent->getNumTopSegments(); hal_size_t csegLen = 0; if (child) { csegLen = child->getSequenceLength() / child->getNumTopSegments(); } for (size_t j = 0; j < g->getNumTopSegments(); ++j) { bool inv = false; bi = parent->getBottomSegmentIterator(j); bs.set(j * segLen, segLen, (j * segLen) / psegLen); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(j, inv)); bs.applyTo(bi); hal_index_t bparse = NULL_INDEX; if (child != NULL) { bparse = (j * segLen) / csegLen; } ti = g->getTopSegmentIterator(j); ts.set(j * segLen, segLen, j, inv, bparse); ts.applyTo(ti); } } for (size_t i = 0; i < 5; ++i) { const Genome* g = gis[i]; const Genome* parent = g->getParent(); const Genome* child = i == 4 ? NULL : g->getChild(0); hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments(); hal_size_t psegLen = parent->getSequenceLength() / parent->getNumTopSegments(); hal_size_t csegLen = 0; if (child) { csegLen = child->getSequenceLength() / child->getNumTopSegments(); } for (size_t j = 0; j < g->getNumTopSegments(); ++j) { bool inv = rand() % 4 == 0; bi = parent->getBottomSegmentIterator(j); bs.set(j * segLen, segLen, (j * segLen) / psegLen); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(j, inv)); bs.applyTo(bi); hal_index_t bparse = NULL_INDEX; if (child != NULL) { bparse = (j * segLen) / csegLen; } ti = g->getTopSegmentIterator(j); ts.set(j * segLen, segLen, j, inv, bparse); ts.applyTo(ti); } } }