void TopSegmentIsGapTest::createCallBack(Alignment *alignment) { size_t numSequences = 3; vector<Sequence::Info> seqVec(numSequences); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; Genome *parent1 = alignment->addRootGenome("parent1"); Genome *child1 = alignment->addLeafGenome("child1", "parent1", 1); // set up two genomes. each with three sequences. each sequence // with 5 segments of length two. start with segment i in parent // aligned with segment i in child. for (size_t i = 0; i < numSequences; ++i) { string name = "Sequence" + std::to_string(i); seqVec[i] = Sequence::Info(name, 10, 5, 5); } parent1->setDimensions(seqVec); child1->setDimensions(seqVec); for (bi = parent1->getBottomSegmentIterator(); not bi->atEnd(); bi->toRight()) { bs.set(bi->getBottomSegment()->getArrayIndex() * 2, 2); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(bi->getBottomSegment()->getArrayIndex(), false)); bs.applyTo(bi); } for (ti = child1->getTopSegmentIterator(); not ti->atEnd(); ti->toRight()) { ts.set(ti->getTopSegment()->getArrayIndex() * 2, 2, ti->getTopSegment()->getArrayIndex()); ts.applyTo(ti); } // insertion in middle (8th top segment) bi = parent1->getBottomSegmentIterator(8); ti = child1->getTopSegmentIterator(8); assert(bi->getBottomSegment()->getChildIndex(0) == 8 && ti->getTopSegment()->getParentIndex() == 8); bi->getBottomSegment()->setChildIndex(0, 9); ti->getTopSegment()->setParentIndex(NULL_INDEX); ti->toRight(); ti->getTopSegment()->setParentIndex(8); // insertion at begining (10th top segment) bi = parent1->getBottomSegmentIterator(10); ti = child1->getTopSegmentIterator(10); assert(bi->getBottomSegment()->getChildIndex(0) == 10 && ti->getTopSegment()->getParentIndex() == 10); bi->getBottomSegment()->setChildIndex(0, 11); ti->getTopSegment()->setParentIndex(NULL_INDEX); ti->toRight(); ti->getTopSegment()->setParentIndex(10); // just having a null parent is not enough for an insertion bi = parent1->getBottomSegmentIterator(2); ti = child1->getTopSegmentIterator(2); assert(bi->getBottomSegment()->getChildIndex(0) == 2 && ti->getTopSegment()->getParentIndex() == 2); ti->getTopSegment()->setParentIndex(NULL_INDEX); }
void MappedSegmentMapDupeTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // setup simple case were there is an edge from a parent to // child and it is reversed Genome* parent = alignment->addRootGenome("parent"); Genome* child1 = alignment->addLeafGenome("child1", "parent", 1); Genome* child2 = alignment->addLeafGenome("child2", "parent", 1); seqVec[0] = Sequence::Info("Sequence", 3, 0, 1); parent->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); child1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); child2->setDimensions(seqVec); parent->setString("CCC"); child1->setString("CCCTACGTG"); child2->setString("CCCTACGTG"); bi = parent->getBottomSegmentIterator(); bs.set(0, 3); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); ti = child1->getTopSegmentIterator(); ts.set(0, 3, 0, true, NULL_INDEX, 1); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 0, true, NULL_INDEX, 2); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 0, true, NULL_INDEX, 0); ts.applyTo(ti); ti = child2->getTopSegmentIterator(); ts.set(0, 3, 0, false); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, NULL_INDEX, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, NULL_INDEX, false); ts.applyTo(ti); }
void TopSegmentIteratorReverseTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // setup simple case were there is an edge from a parent to // child and it is reversed Genome* parent1 = alignment->addRootGenome("parent1"); Genome* child1 = alignment->addLeafGenome("child1", "parent1", 1); seqVec[0] = Sequence::Info("Sequence", 10, 2, 2); parent1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 10, 2, 2); child1->setDimensions(seqVec); parent1->setString("CCCTACGTGC"); child1->setString("CCCTACGTGC"); bi = parent1->getBottomSegmentIterator(); bs.set(0, 10, 0); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs.applyTo(bi); ti = child1->getTopSegmentIterator(); ts.set(0, 10, 0, true, 0); ts.applyTo(ti); bi = child1->getBottomSegmentIterator(); bs.set(0, 5, 0); bs._children.clear(); bs.applyTo(bi); bi->toRight(); bs.set(5, 5, 0); bs.applyTo(bi); }
void TopSegmentIteratorParseTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // case 1: bottom segment aligns perfectly with top segment Genome* case1 = alignment->addRootGenome("case1"); seqVec[0] = Sequence::Info("Sequence", 10, 2, 2); case1->setDimensions(seqVec); ti = case1->getTopSegmentIterator(); ts.set(0, 10, NULL_INDEX, false, 0, NULL_INDEX); ts.applyTo(ti); bi = case1->getBottomSegmentIterator(); bs.set(0, 10, 0); bs.applyTo(bi); // case 2: bottom segment is completely contained in top segment Genome* case2 = alignment->addRootGenome("case2"); seqVec[0] = Sequence::Info("Sequence", 10, 2, 3); case2->setDimensions(seqVec); ti = case2->getTopSegmentIterator(); ts.set(0, 9, NULL_INDEX, false, 0, NULL_INDEX); ts.applyTo(ti); bi = case2->getBottomSegmentIterator(); bs.set(0, 3, 0); bs.applyTo(bi); bi->toRight(); bs.set(3, 4, 0); bs.applyTo(bi); bi->toRight(); bs.set(7, 3, 0); bs.applyTo(bi); // case 3 top segment is completely contained in bottom segment Genome* case3 = alignment->addRootGenome("case3"); seqVec[0] = Sequence::Info("Sequence", 10, 3, 2); case3->setDimensions(seqVec); ti = case3->getTopSegmentIterator(); ts.set(0, 3, NULL_INDEX, false, 0); ts.applyTo(ti); ti->toRight(); ts.set(3, 4, NULL_INDEX, false, 0); ts.applyTo(ti); ti->toRight(); ts.set(7, 3, NULL_INDEX, false, 0); ts.applyTo(ti); bi = case3->getBottomSegmentIterator(); bs.set(0, 9, 0); bs.applyTo(bi); // case 4: top segment overhangs bottom segment on the left Genome* case4 = alignment->addRootGenome("case4"); seqVec[0] = Sequence::Info("Sequence", 10, 2, 2); case4->setDimensions(seqVec); ti = case4->getTopSegmentIterator(); ts.set(0, 9, NULL_INDEX, false, 0); ts.applyTo(ti); bi = case4->getBottomSegmentIterator(); bs.set(0, 5, 0); bs.applyTo(bi); bi->toRight(); bs.set(5, 5, 0); bs.applyTo(bi); }
void MappedSegmentMapExtraParalogsTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // Set up a case where all the segments of grandChild1 coalesce with // the first segment of grandChild2, but only if using the root as // the coalescence limit. Otherwise only the first segments map to // each other. Genome* root = alignment->addRootGenome("root"); Genome* parent = alignment->addLeafGenome("parent", "root", 1); Genome* grandChild1 = alignment->addLeafGenome("grandChild1", "parent", 1); Genome* grandChild2 = alignment->addLeafGenome("grandChild2", "parent", 1); seqVec[0] = Sequence::Info("Sequence", 3, 0, 1); root->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 3); parent->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); grandChild1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); grandChild2->setDimensions(seqVec); root->setString("CCC"); parent->setString("CCCTACGTG"); grandChild1->setString("CCCTACGTG"); grandChild2->setString("CCCTACGTG"); bi = root->getBottomSegmentIterator(); bs.set(0, 3); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); ti = parent->getTopSegmentIterator(); ts.set(0, 3, 0, false, NULL_INDEX, 1); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 0, false, NULL_INDEX, 2); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 0, false, NULL_INDEX, 0); ts.applyTo(ti); bi = parent->getBottomSegmentIterator(); bs.set(0, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); bi->toRight(); bs.set(3, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(1, true)); bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, true)); bs.applyTo(bi); bi->toRight(); bs.set(6, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(2, true)); bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, false)); bs.applyTo(bi); ti = grandChild1->getTopSegmentIterator(); ts.set(0, 3, 0, true); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 1, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 2, true); ts.applyTo(ti); ti = grandChild2->getTopSegmentIterator(); ts.set(0, 3, 0, false); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, NULL_INDEX, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, NULL_INDEX, false); ts.applyTo(ti); parent->fixParseInfo(); }
void MappedSegmentMapUpTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // setup simple case were there is an edge from a parent to // child1 and it is reversed and nonreversed to child2 Genome* parent = alignment->addRootGenome("parent"); Genome* child1 = alignment->addLeafGenome("child1", "parent", 1); Genome* child2 = alignment->addLeafGenome("child2", "parent", 1); // add a bunch of grandchildren with no rearrangemnts to test // simple parsing Genome* g1 = alignment->addLeafGenome("g1", "child2", 1); Genome* g2 = alignment->addLeafGenome("g2", "g1", 1); Genome* g3 = alignment->addLeafGenome("g3", "g2", 1); Genome* g4 = alignment->addLeafGenome("g4", "g3", 1); Genome* g5 = alignment->addLeafGenome("g5", "g4", 1); // add some with random inversions Genome* gi1 = alignment->addLeafGenome("gi1", "child1", 1); Genome* gi2 = alignment->addLeafGenome("gi2", "gi1", 1); Genome* gi3 = alignment->addLeafGenome("gi3", "gi2", 1); Genome* gi4 = alignment->addLeafGenome("gi4", "gi3", 1); Genome* gi5 = alignment->addLeafGenome("gi5", "gi4", 1); Genome* gs[] = {g1, g2, g3, g4, g5}; Genome* gis[] = {gi1, gi2, gi3, gi4, gi5}; seqVec[0] = Sequence::Info("Sequence", 12, 0, 1); parent->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 1, 6); child1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 1, 6); child2->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 6, 4); g1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 4, 3); g2->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 3, 2); g3->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 2, 12); g4->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 12, 0); g5->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 6, 4); gi1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 4, 3); gi2->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 3, 2); gi3->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 2, 12); gi4->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 12, 0); gi5->setDimensions(seqVec); parent->setString("CCCTACTTGTGC"); child1->setString("CCCTACTTGTGC"); child2->setString("CCCTACTTGTGC"); for (size_t i = 0; i < 5; ++i) { gs[i]->setString("TCCTACTTGTGC"); gis[i]->setString("TCCTACTTGTGC"); } bi = parent->getBottomSegmentIterator(); bs.set(0, 12); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); ti = child1->getTopSegmentIterator(); ts.set(0, 12, 0, true, 0); ts.applyTo(ti); ti = child2->getTopSegmentIterator(); ts.set(0, 12, 0, false, 0); ts.applyTo(ti); for (size_t i = 0; i < 6; ++i) { bi = child2->getBottomSegmentIterator(i); bs.set(i * 2, 2, 0); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(i, false)); bs.applyTo(bi); ti = g1->getTopSegmentIterator(i); ts.set(i * 2, 2, i, false); ts.applyTo(ti); } for (size_t i = 0; i < 6; ++i) { bi = child1->getBottomSegmentIterator(i); bs.set(i * 2, 2, 0); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(i, false)); bs.applyTo(bi); ti = gi1->getTopSegmentIterator(i); ts.set(i * 2, 2, i, false); ts.applyTo(ti); } for (size_t i = 0; i < 5; ++i) { const Genome* g = gs[i]; const Genome* parent = g->getParent(); const Genome* child = i == 4 ? NULL : g->getChild(0); hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments(); hal_size_t psegLen = parent->getSequenceLength() / parent->getNumTopSegments(); hal_size_t csegLen = 0; if (child) { csegLen = child->getSequenceLength() / child->getNumTopSegments(); } for (size_t j = 0; j < g->getNumTopSegments(); ++j) { bool inv = false; bi = parent->getBottomSegmentIterator(j); bs.set(j * segLen, segLen, (j * segLen) / psegLen); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(j, inv)); bs.applyTo(bi); hal_index_t bparse = NULL_INDEX; if (child != NULL) { bparse = (j * segLen) / csegLen; } ti = g->getTopSegmentIterator(j); ts.set(j * segLen, segLen, j, inv, bparse); ts.applyTo(ti); } } for (size_t i = 0; i < 5; ++i) { const Genome* g = gis[i]; const Genome* parent = g->getParent(); const Genome* child = i == 4 ? NULL : g->getChild(0); hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments(); hal_size_t psegLen = parent->getSequenceLength() / parent->getNumTopSegments(); hal_size_t csegLen = 0; if (child) { csegLen = child->getSequenceLength() / child->getNumTopSegments(); } for (size_t j = 0; j < g->getNumTopSegments(); ++j) { bool inv = rand() % 4 == 0; bi = parent->getBottomSegmentIterator(j); bs.set(j * segLen, segLen, (j * segLen) / psegLen); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(j, inv)); bs.applyTo(bi); hal_index_t bparse = NULL_INDEX; if (child != NULL) { bparse = (j * segLen) / csegLen; } ti = g->getTopSegmentIterator(j); ts.set(j * segLen, segLen, j, inv, bparse); ts.applyTo(ti); } } }