void MappedSegmentMapDupeTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // setup simple case were there is an edge from a parent to // child and it is reversed Genome* parent = alignment->addRootGenome("parent"); Genome* child1 = alignment->addLeafGenome("child1", "parent", 1); Genome* child2 = alignment->addLeafGenome("child2", "parent", 1); seqVec[0] = Sequence::Info("Sequence", 3, 0, 1); parent->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); child1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); child2->setDimensions(seqVec); parent->setString("CCC"); child1->setString("CCCTACGTG"); child2->setString("CCCTACGTG"); bi = parent->getBottomSegmentIterator(); bs.set(0, 3); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); ti = child1->getTopSegmentIterator(); ts.set(0, 3, 0, true, NULL_INDEX, 1); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 0, true, NULL_INDEX, 2); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 0, true, NULL_INDEX, 0); ts.applyTo(ti); ti = child2->getTopSegmentIterator(); ts.set(0, 3, 0, false); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, NULL_INDEX, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, NULL_INDEX, false); ts.applyTo(ti); }
void GenomeCreateTest::createCallBack(AlignmentPtr alignment) { hal_size_t alignmentSize = alignment->getNumGenomes(); CuAssertTrue(_testCase, alignmentSize == 0); Genome* ancGenome = alignment->addRootGenome("AncGenome", 0); Genome* leaf1Genome = alignment->addLeafGenome("Leaf1", "AncGenome", 0.1); Genome* leaf2Genome = alignment->addLeafGenome("Leaf2", "AncGenome", 0.2); Genome* leaf3Genome = alignment->addLeafGenome("Leaf3", "AncGenome", 0.3); MetaData* ancMeta = ancGenome->getMetaData(); ancMeta->set("Young", "Jeezy"); vector<Sequence::Info> seqVec(1); seqVec[0] =Sequence::Info("Sequence", 1000000, 5000, 700000); ancGenome->setDimensions(seqVec); seqVec[0] =Sequence::Info("Sequence", 1000000, 700000, 0); leaf1Genome->setDimensions(seqVec); seqVec[0] =Sequence::Info("Sequence", 2000000, 700000, 0); leaf2Genome->setDimensions(seqVec); seqVec[0] =Sequence::Info("Sequence", 3000000, 700000, 0); leaf3Genome->setDimensions(seqVec); }
void TopSegmentIteratorReverseTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // setup simple case were there is an edge from a parent to // child and it is reversed Genome* parent1 = alignment->addRootGenome("parent1"); Genome* child1 = alignment->addLeafGenome("child1", "parent1", 1); seqVec[0] = Sequence::Info("Sequence", 10, 2, 2); parent1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 10, 2, 2); child1->setDimensions(seqVec); parent1->setString("CCCTACGTGC"); child1->setString("CCCTACGTGC"); bi = parent1->getBottomSegmentIterator(); bs.set(0, 10, 0); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs.applyTo(bi); ti = child1->getTopSegmentIterator(); ts.set(0, 10, 0, true, 0); ts.applyTo(ti); bi = child1->getBottomSegmentIterator(); bs.set(0, 5, 0); bs._children.clear(); bs.applyTo(bi); bi->toRight(); bs.set(5, 5, 0); bs.applyTo(bi); }
void TopSegmentSimpleIteratorTest::createCallBack(AlignmentPtr alignment) { Genome* ancGenome = alignment->addRootGenome("Anc0", 0); size_t numChildren = 9; for (size_t i = 0; i < numChildren; ++i) { std::stringstream ss; ss << i; alignment->addLeafGenome(string("Leaf") + ss.str(), "Anc0", 0.1); } vector<Sequence::Info> seqVec(1); seqVec[0] = Sequence::Info("Sequence", 1000000, 5000, 10000); ancGenome->setDimensions(seqVec); CuAssertTrue(_testCase, ancGenome->getNumChildren() == numChildren); _topSegments.clear(); for (size_t i = 0; i < ancGenome->getNumTopSegments(); ++i) { TopSegmentStruct topSeg; topSeg.setRandom(); topSeg._length = ancGenome->getSequenceLength() / ancGenome->getNumTopSegments(); topSeg._startPosition = i * topSeg._length; _topSegments.push_back(topSeg); } TopSegmentIteratorPtr tsIt = ancGenome->getTopSegmentIterator(0); TopSegmentIteratorConstPtr tsEnd = ancGenome->getTopSegmentEndIterator(); for (size_t i = 0; tsIt != tsEnd; tsIt->toRight(), ++i) { CuAssertTrue(_testCase, (size_t)tsIt->getTopSegment()->getArrayIndex() == i); _topSegments[i].applyTo(tsIt); } }
int main(int argc, char *argv[]) { CLParserPtr optParser = initParser(); string inPath, botAlignmentPath, topAlignmentPath, parentName, insertName, childName, leafName; double upperBranchLength, leafBranchLength; bool noMarkAncestors; try { optParser->parseOptions(argc, argv); inPath = optParser->getArgument<string>("inFile"); botAlignmentPath = optParser->getArgument<string>("botAlignmentFile"); topAlignmentPath = optParser->getArgument<string>("topAlignmentFile"); parentName = optParser->getArgument<string>("parentName"); insertName = optParser->getArgument<string>("insertName"); childName = optParser->getArgument<string>("childName"); leafName = optParser->getArgument<string>("leafName"); upperBranchLength = optParser->getArgument<double>("upperBranchLength"); leafBranchLength = optParser->getArgument<double>("leafBranchLength"); noMarkAncestors = optParser->getFlag("noMarkAncestors"); } catch (exception &e) { optParser->printUsage(cerr); return 1; } AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser); AlignmentConstPtr botAlignment = openHalAlignment(botAlignmentPath, optParser); AlignmentConstPtr topAlignment = openHalAlignment(topAlignmentPath, optParser); mainAlignment->insertGenome(insertName, parentName, childName, upperBranchLength); mainAlignment->addLeafGenome(leafName, insertName, leafBranchLength); // Insert the new intermediate node. Genome *insertGenome = mainAlignment->openGenome(insertName); const Genome *topInsertGenome = topAlignment->openGenome(insertName); const Genome *botInsertGenome = botAlignment->openGenome(insertName); topInsertGenome->copyDimensions(insertGenome); topInsertGenome->copyTopDimensions(insertGenome); botInsertGenome->copyBottomDimensions(insertGenome); topInsertGenome->copySequence(insertGenome); topInsertGenome->copyTopSegments(insertGenome); topInsertGenome->copyMetadata(insertGenome); botInsertGenome->copyBottomSegments(insertGenome); insertGenome->fixParseInfo(); // Copy the bottom segments for the parent genome from the top alignment. Genome *parentGenome = mainAlignment->openGenome(parentName); const Genome *botParentGenome = topAlignment->openGenome(parentName); botParentGenome->copyBottomDimensions(parentGenome); botParentGenome->copyBottomSegments(parentGenome); parentGenome->fixParseInfo(); // Fix the parent's other children as well. vector<string> allChildren = mainAlignment->getChildNames(parentName); for (size_t i = 0; i < allChildren.size(); i++) { if (allChildren[i] != insertName) { Genome *outGenome = mainAlignment->openGenome(allChildren[i]); const Genome *topSegmentsGenome = topAlignment->openGenome(allChildren[i]); topSegmentsGenome->copyTopDimensions(outGenome); topSegmentsGenome->copyTopSegments(outGenome); outGenome->fixParseInfo(); } } // Copy the top segments for the child genome from the bottom alignment. Genome *childGenome = mainAlignment->openGenome(childName); const Genome *topChildGenome = botAlignment->openGenome(childName); topChildGenome->copyTopDimensions(childGenome); topChildGenome->copyTopSegments(childGenome); childGenome->fixParseInfo(); // Copy the entire genome for the leaf from the bottom alignment. Genome *outLeafGenome = mainAlignment->openGenome(leafName); const Genome *inLeafGenome = botAlignment->openGenome(leafName); inLeafGenome->copy(outLeafGenome); if (!noMarkAncestors) { markAncestorsForUpdate(mainAlignment, insertName); } mainAlignment->close(); botAlignment->close(); topAlignment->close(); }
void TopSegmentIsGapTest::createCallBack(AlignmentPtr alignment) { size_t numSequences = 3; vector<Sequence::Info> seqVec(numSequences); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; Genome* parent1 = alignment->addRootGenome("parent1"); Genome* child1 = alignment->addLeafGenome("child1", "parent1", 1); // set up two genomes. each with three sequences. each sequence // with 5 segments of length two. start with segment i in parent // aligned with segment i in child. for (size_t i = 0; i < numSequences; ++i) { stringstream ss; ss << "Sequence" << i; string name = ss.str(); seqVec[i] = Sequence::Info(name, 10, 5, 5); } parent1->setDimensions(seqVec); child1->setDimensions(seqVec); bi = parent1->getBottomSegmentIterator(); for (; bi != parent1->getBottomSegmentEndIterator(); bi->toRight()) { bs.set(bi->getBottomSegment()->getArrayIndex() * 2, 2); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>( bi->getBottomSegment()->getArrayIndex(), false)); bs.applyTo(bi); } ti = child1->getTopSegmentIterator(); for (; ti != child1->getTopSegmentEndIterator(); ti->toRight()) { ts.set(ti->getTopSegment()->getArrayIndex() * 2, 2, ti->getTopSegment()->getArrayIndex()); ts.applyTo(ti); } // insertion in middle (8th top segment) bi = parent1->getBottomSegmentIterator(8); ti = child1->getTopSegmentIterator(8); assert(bi->getBottomSegment()->getChildIndex(0) == 8 && ti->getTopSegment()->getParentIndex() == 8); bi->getBottomSegment()->setChildIndex(0, 9); ti->getTopSegment()->setParentIndex(NULL_INDEX); ti->toRight(); ti->getTopSegment()->setParentIndex(8); // insertion at begining (10th top segment) bi = parent1->getBottomSegmentIterator(10); ti = child1->getTopSegmentIterator(10); assert(bi->getBottomSegment()->getChildIndex(0) == 10 && ti->getTopSegment()->getParentIndex() == 10); bi->getBottomSegment()->setChildIndex(0, 11); ti->getTopSegment()->setParentIndex(NULL_INDEX); ti->toRight(); ti->getTopSegment()->setParentIndex(10); // just having a null parent is not enough for an insertion bi = parent1->getBottomSegmentIterator(2); ti = child1->getTopSegmentIterator(2); assert(bi->getBottomSegment()->getChildIndex(0) == 2 && ti->getTopSegment()->getParentIndex() == 2); ti->getTopSegment()->setParentIndex(NULL_INDEX); }
void MappedSegmentMapExtraParalogsTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // Set up a case where all the segments of grandChild1 coalesce with // the first segment of grandChild2, but only if using the root as // the coalescence limit. Otherwise only the first segments map to // each other. Genome* root = alignment->addRootGenome("root"); Genome* parent = alignment->addLeafGenome("parent", "root", 1); Genome* grandChild1 = alignment->addLeafGenome("grandChild1", "parent", 1); Genome* grandChild2 = alignment->addLeafGenome("grandChild2", "parent", 1); seqVec[0] = Sequence::Info("Sequence", 3, 0, 1); root->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 3); parent->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); grandChild1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); grandChild2->setDimensions(seqVec); root->setString("CCC"); parent->setString("CCCTACGTG"); grandChild1->setString("CCCTACGTG"); grandChild2->setString("CCCTACGTG"); bi = root->getBottomSegmentIterator(); bs.set(0, 3); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); ti = parent->getTopSegmentIterator(); ts.set(0, 3, 0, false, NULL_INDEX, 1); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 0, false, NULL_INDEX, 2); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 0, false, NULL_INDEX, 0); ts.applyTo(ti); bi = parent->getBottomSegmentIterator(); bs.set(0, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); bi->toRight(); bs.set(3, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(1, true)); bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, true)); bs.applyTo(bi); bi->toRight(); bs.set(6, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(2, true)); bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, false)); bs.applyTo(bi); ti = grandChild1->getTopSegmentIterator(); ts.set(0, 3, 0, true); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 1, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 2, true); ts.applyTo(ti); ti = grandChild2->getTopSegmentIterator(); ts.set(0, 3, 0, false); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, NULL_INDEX, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, NULL_INDEX, false); ts.applyTo(ti); parent->fixParseInfo(); }
void MappedSegmentMapUpTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // setup simple case were there is an edge from a parent to // child1 and it is reversed and nonreversed to child2 Genome* parent = alignment->addRootGenome("parent"); Genome* child1 = alignment->addLeafGenome("child1", "parent", 1); Genome* child2 = alignment->addLeafGenome("child2", "parent", 1); // add a bunch of grandchildren with no rearrangemnts to test // simple parsing Genome* g1 = alignment->addLeafGenome("g1", "child2", 1); Genome* g2 = alignment->addLeafGenome("g2", "g1", 1); Genome* g3 = alignment->addLeafGenome("g3", "g2", 1); Genome* g4 = alignment->addLeafGenome("g4", "g3", 1); Genome* g5 = alignment->addLeafGenome("g5", "g4", 1); // add some with random inversions Genome* gi1 = alignment->addLeafGenome("gi1", "child1", 1); Genome* gi2 = alignment->addLeafGenome("gi2", "gi1", 1); Genome* gi3 = alignment->addLeafGenome("gi3", "gi2", 1); Genome* gi4 = alignment->addLeafGenome("gi4", "gi3", 1); Genome* gi5 = alignment->addLeafGenome("gi5", "gi4", 1); Genome* gs[] = {g1, g2, g3, g4, g5}; Genome* gis[] = {gi1, gi2, gi3, gi4, gi5}; seqVec[0] = Sequence::Info("Sequence", 12, 0, 1); parent->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 1, 6); child1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 1, 6); child2->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 6, 4); g1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 4, 3); g2->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 3, 2); g3->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 2, 12); g4->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 12, 0); g5->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 6, 4); gi1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 4, 3); gi2->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 3, 2); gi3->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 2, 12); gi4->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 12, 12, 0); gi5->setDimensions(seqVec); parent->setString("CCCTACTTGTGC"); child1->setString("CCCTACTTGTGC"); child2->setString("CCCTACTTGTGC"); for (size_t i = 0; i < 5; ++i) { gs[i]->setString("TCCTACTTGTGC"); gis[i]->setString("TCCTACTTGTGC"); } bi = parent->getBottomSegmentIterator(); bs.set(0, 12); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); ti = child1->getTopSegmentIterator(); ts.set(0, 12, 0, true, 0); ts.applyTo(ti); ti = child2->getTopSegmentIterator(); ts.set(0, 12, 0, false, 0); ts.applyTo(ti); for (size_t i = 0; i < 6; ++i) { bi = child2->getBottomSegmentIterator(i); bs.set(i * 2, 2, 0); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(i, false)); bs.applyTo(bi); ti = g1->getTopSegmentIterator(i); ts.set(i * 2, 2, i, false); ts.applyTo(ti); } for (size_t i = 0; i < 6; ++i) { bi = child1->getBottomSegmentIterator(i); bs.set(i * 2, 2, 0); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(i, false)); bs.applyTo(bi); ti = gi1->getTopSegmentIterator(i); ts.set(i * 2, 2, i, false); ts.applyTo(ti); } for (size_t i = 0; i < 5; ++i) { const Genome* g = gs[i]; const Genome* parent = g->getParent(); const Genome* child = i == 4 ? NULL : g->getChild(0); hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments(); hal_size_t psegLen = parent->getSequenceLength() / parent->getNumTopSegments(); hal_size_t csegLen = 0; if (child) { csegLen = child->getSequenceLength() / child->getNumTopSegments(); } for (size_t j = 0; j < g->getNumTopSegments(); ++j) { bool inv = false; bi = parent->getBottomSegmentIterator(j); bs.set(j * segLen, segLen, (j * segLen) / psegLen); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(j, inv)); bs.applyTo(bi); hal_index_t bparse = NULL_INDEX; if (child != NULL) { bparse = (j * segLen) / csegLen; } ti = g->getTopSegmentIterator(j); ts.set(j * segLen, segLen, j, inv, bparse); ts.applyTo(ti); } } for (size_t i = 0; i < 5; ++i) { const Genome* g = gis[i]; const Genome* parent = g->getParent(); const Genome* child = i == 4 ? NULL : g->getChild(0); hal_size_t segLen = g->getSequenceLength() / g->getNumTopSegments(); hal_size_t psegLen = parent->getSequenceLength() / parent->getNumTopSegments(); hal_size_t csegLen = 0; if (child) { csegLen = child->getSequenceLength() / child->getNumTopSegments(); } for (size_t j = 0; j < g->getNumTopSegments(); ++j) { bool inv = rand() % 4 == 0; bi = parent->getBottomSegmentIterator(j); bs.set(j * segLen, segLen, (j * segLen) / psegLen); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(j, inv)); bs.applyTo(bi); hal_index_t bparse = NULL_INDEX; if (child != NULL) { bparse = (j * segLen) / csegLen; } ti = g->getTopSegmentIterator(j); ts.set(j * segLen, segLen, j, inv, bparse); ts.applyTo(ti); } } }