void copyFromTopAlignment(AlignmentConstPtr topAlignment, AlignmentPtr mainAlignment, const string &genomeName) { Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName); const Genome *topReplacedGenome = topAlignment->openGenome(genomeName); topReplacedGenome->copyTopDimensions(mainReplacedGenome); topReplacedGenome->copyTopSegments(mainReplacedGenome); mainReplacedGenome->fixParseInfo(); // Copy bot segments for the parent and top segments for the // siblings of the genome that's being replaced Genome *mainParent = mainReplacedGenome->getParent(); const Genome *topParent = topReplacedGenome->getParent(); topParent->copyBottomDimensions(mainParent); topParent->copyBottomSegments(mainParent); mainParent->fixParseInfo(); vector<string> siblings = mainAlignment->getChildNames(mainParent->getName()); for (size_t i = 0; i < siblings.size(); i++) { if (siblings[i] != genomeName) { Genome *mainChild = mainAlignment->openGenome(siblings[i]); const Genome *topChild = topAlignment->openGenome(siblings[i]); topChild->copyTopDimensions(mainChild); topChild->copyTopSegments(mainChild); mainChild->fixParseInfo(); } } }
void copyFromBottomAlignment(AlignmentConstPtr bottomAlignment, AlignmentPtr mainAlignment, const string &genomeName) { // Copy genome & bottom segments for the genome that's being replaced Genome *mainReplacedGenome = mainAlignment->openGenome(genomeName); const Genome *botReplacedGenome = bottomAlignment->openGenome(genomeName); botReplacedGenome->copyDimensions(mainReplacedGenome); botReplacedGenome->copySequence(mainReplacedGenome); botReplacedGenome->copyBottomDimensions(mainReplacedGenome); botReplacedGenome->copyBottomSegments(mainReplacedGenome); mainReplacedGenome->fixParseInfo(); // Copy top segments for the children vector<string> children = mainAlignment->getChildNames(genomeName); for (size_t i = 0; i < children.size(); i++) { Genome *mainChild = mainAlignment->openGenome(children[i]); const Genome *botChild = bottomAlignment->openGenome(children[i]); botChild->copyTopDimensions(mainChild); botChild->copyTopSegments(mainChild); mainChild->fixParseInfo(); } }
int main(int argc, char *argv[]) { CLParserPtr optParser = initParser(); string inPath, botAlignmentPath, topAlignmentPath, parentName, insertName, childName, leafName; double upperBranchLength, leafBranchLength; bool noMarkAncestors; try { optParser->parseOptions(argc, argv); inPath = optParser->getArgument<string>("inFile"); botAlignmentPath = optParser->getArgument<string>("botAlignmentFile"); topAlignmentPath = optParser->getArgument<string>("topAlignmentFile"); parentName = optParser->getArgument<string>("parentName"); insertName = optParser->getArgument<string>("insertName"); childName = optParser->getArgument<string>("childName"); leafName = optParser->getArgument<string>("leafName"); upperBranchLength = optParser->getArgument<double>("upperBranchLength"); leafBranchLength = optParser->getArgument<double>("leafBranchLength"); noMarkAncestors = optParser->getFlag("noMarkAncestors"); } catch (exception &e) { optParser->printUsage(cerr); return 1; } AlignmentPtr mainAlignment = openHalAlignment(inPath, optParser); AlignmentConstPtr botAlignment = openHalAlignment(botAlignmentPath, optParser); AlignmentConstPtr topAlignment = openHalAlignment(topAlignmentPath, optParser); mainAlignment->insertGenome(insertName, parentName, childName, upperBranchLength); mainAlignment->addLeafGenome(leafName, insertName, leafBranchLength); // Insert the new intermediate node. Genome *insertGenome = mainAlignment->openGenome(insertName); const Genome *topInsertGenome = topAlignment->openGenome(insertName); const Genome *botInsertGenome = botAlignment->openGenome(insertName); topInsertGenome->copyDimensions(insertGenome); topInsertGenome->copyTopDimensions(insertGenome); botInsertGenome->copyBottomDimensions(insertGenome); topInsertGenome->copySequence(insertGenome); topInsertGenome->copyTopSegments(insertGenome); topInsertGenome->copyMetadata(insertGenome); botInsertGenome->copyBottomSegments(insertGenome); insertGenome->fixParseInfo(); // Copy the bottom segments for the parent genome from the top alignment. Genome *parentGenome = mainAlignment->openGenome(parentName); const Genome *botParentGenome = topAlignment->openGenome(parentName); botParentGenome->copyBottomDimensions(parentGenome); botParentGenome->copyBottomSegments(parentGenome); parentGenome->fixParseInfo(); // Fix the parent's other children as well. vector<string> allChildren = mainAlignment->getChildNames(parentName); for (size_t i = 0; i < allChildren.size(); i++) { if (allChildren[i] != insertName) { Genome *outGenome = mainAlignment->openGenome(allChildren[i]); const Genome *topSegmentsGenome = topAlignment->openGenome(allChildren[i]); topSegmentsGenome->copyTopDimensions(outGenome); topSegmentsGenome->copyTopSegments(outGenome); outGenome->fixParseInfo(); } } // Copy the top segments for the child genome from the bottom alignment. Genome *childGenome = mainAlignment->openGenome(childName); const Genome *topChildGenome = botAlignment->openGenome(childName); topChildGenome->copyTopDimensions(childGenome); topChildGenome->copyTopSegments(childGenome); childGenome->fixParseInfo(); // Copy the entire genome for the leaf from the bottom alignment. Genome *outLeafGenome = mainAlignment->openGenome(leafName); const Genome *inLeafGenome = botAlignment->openGenome(leafName); inLeafGenome->copy(outLeafGenome); if (!noMarkAncestors) { markAncestorsForUpdate(mainAlignment, insertName); } mainAlignment->close(); botAlignment->close(); topAlignment->close(); }
// Test copying when the sequences aren't in the same order. // // Create an alignment with "Sequence1" positions aligned to // "Sequence1" positions, and "Sequence2" to "Sequence2", but try // copying the segments to an alignment with "Sequence2" before // "Sequence1" in the ordering. void GenomeCopySegmentsWhenSequencesOutOfOrderTest::createCallBack(Alignment *alignment) { hal_size_t alignmentSize = alignment->getNumGenomes(); CuAssertTrue(_testCase, alignmentSize == 0); // Hacky: Need a different alignment to test copying the bottom // segments correctly. (the names of a node's children are used // when copying bottom segments, and two genomes can't have the same // name in the same alignment) _path = getTempFile(); _secondAlignment = AlignmentPtr(getTestAlignmentInstances(alignment->getStorageFormat(), _path, CREATE_ACCESS)); Genome *rootGenome = alignment->addRootGenome("root", 0); Genome *internalGenome = alignment->addLeafGenome("internal", "root", 0); Genome *leaf1Genome = alignment->addLeafGenome("leaf1", "root", 0); Genome *leaf2Genome = alignment->addLeafGenome("leaf2", "internal", 0); Genome *copyRootGenome = _secondAlignment->addRootGenome("root", 0); Genome *copyInternalGenome = _secondAlignment->addLeafGenome("internal", "root", 0); Genome *copyLeaf1Genome = _secondAlignment->addLeafGenome("leaf1", "root", 0); Genome *copyLeaf2Genome = _secondAlignment->addLeafGenome("leaf2", "internal", 0); vector<Sequence::Info> seqVec(2); seqVec[0] = Sequence::Info("Sequence1", 130, 0, 13); seqVec[1] = Sequence::Info("Sequence2", 170, 0, 17); rootGenome->setDimensions(seqVec); rootGenome->setString(randomString(rootGenome->getSequenceLength())); seqVec[0] = Sequence::Info("Sequence1", 130, 13, 13); seqVec[1] = Sequence::Info("Sequence2", 170, 17, 17); internalGenome->setDimensions(seqVec); internalGenome->setString(randomString(internalGenome->getSequenceLength())); seqVec[0] = Sequence::Info("Sequence1", 130, 13, 0); seqVec[1] = Sequence::Info("Sequence2", 170, 17, 0); leaf1Genome->setDimensions(seqVec); leaf1Genome->setString(randomString(leaf1Genome->getSequenceLength())); leaf2Genome->setDimensions(seqVec); leaf2Genome->setString(randomString(leaf2Genome->getSequenceLength())); setTopSegments(internalGenome, 10); setTopSegments(leaf1Genome, 10); setTopSegments(leaf2Genome, 10); setBottomSegments(rootGenome, 10); setBottomSegments(internalGenome, 10); rootGenome->fixParseInfo(); internalGenome->fixParseInfo(); leaf1Genome->fixParseInfo(); leaf2Genome->fixParseInfo(); seqVec[0] = Sequence::Info("Sequence1", 130, 0, 13); seqVec[1] = Sequence::Info("Sequence2", 170, 0, 17); copyRootGenome->setDimensions(seqVec); copyRootGenome->setString(randomString(copyRootGenome->getSequenceLength())); seqVec[0] = Sequence::Info("Sequence1", 130, 13, 0); seqVec[1] = Sequence::Info("Sequence2", 170, 17, 0); copyLeaf1Genome->setDimensions(seqVec); copyLeaf2Genome->setDimensions(seqVec); copyLeaf1Genome->setString(randomString(copyLeaf1Genome->getSequenceLength())); copyLeaf2Genome->setString(randomString(copyLeaf2Genome->getSequenceLength())); seqVec[0] = Sequence::Info("Sequence2", 170, 17, 17); seqVec[1] = Sequence::Info("Sequence1", 130, 13, 13); copyInternalGenome->setDimensions(seqVec); copyInternalGenome->setString(randomString(copyInternalGenome->getSequenceLength())); rootGenome->copyBottomDimensions(copyRootGenome); rootGenome->copyBottomSegments(copyRootGenome); copyRootGenome->fixParseInfo(); internalGenome->copyBottomDimensions(copyInternalGenome); internalGenome->copyBottomSegments(copyInternalGenome); internalGenome->copyTopDimensions(copyInternalGenome); internalGenome->copyTopSegments(copyInternalGenome); copyInternalGenome->fixParseInfo(); leaf1Genome->copyTopDimensions(copyLeaf1Genome); leaf1Genome->copyTopSegments(copyLeaf1Genome); copyLeaf1Genome->fixParseInfo(); leaf2Genome->copyTopDimensions(copyLeaf2Genome); leaf2Genome->copyTopSegments(copyLeaf2Genome); copyLeaf2Genome->fixParseInfo(); _secondAlignment->close(); }
void MappedSegmentMapExtraParalogsTest::createCallBack(AlignmentPtr alignment) { vector<Sequence::Info> seqVec(1); BottomSegmentIteratorPtr bi; BottomSegmentStruct bs; TopSegmentIteratorPtr ti; TopSegmentStruct ts; // Set up a case where all the segments of grandChild1 coalesce with // the first segment of grandChild2, but only if using the root as // the coalescence limit. Otherwise only the first segments map to // each other. Genome* root = alignment->addRootGenome("root"); Genome* parent = alignment->addLeafGenome("parent", "root", 1); Genome* grandChild1 = alignment->addLeafGenome("grandChild1", "parent", 1); Genome* grandChild2 = alignment->addLeafGenome("grandChild2", "parent", 1); seqVec[0] = Sequence::Info("Sequence", 3, 0, 1); root->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 3); parent->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); grandChild1->setDimensions(seqVec); seqVec[0] = Sequence::Info("Sequence", 9, 3, 0); grandChild2->setDimensions(seqVec); root->setString("CCC"); parent->setString("CCCTACGTG"); grandChild1->setString("CCCTACGTG"); grandChild2->setString("CCCTACGTG"); bi = root->getBottomSegmentIterator(); bs.set(0, 3); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); ti = parent->getTopSegmentIterator(); ts.set(0, 3, 0, false, NULL_INDEX, 1); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 0, false, NULL_INDEX, 2); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 0, false, NULL_INDEX, 0); ts.applyTo(ti); bi = parent->getBottomSegmentIterator(); bs.set(0, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(0, true)); bs._children.push_back(pair<hal_size_t, bool>(0, false)); bs.applyTo(bi); bi->toRight(); bs.set(3, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(1, true)); bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, true)); bs.applyTo(bi); bi->toRight(); bs.set(6, 3); bs._children.clear(); bs._children.push_back(pair<hal_size_t, bool>(2, true)); bs._children.push_back(pair<hal_size_t, bool>(NULL_INDEX, false)); bs.applyTo(bi); ti = grandChild1->getTopSegmentIterator(); ts.set(0, 3, 0, true); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, 1, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, 2, true); ts.applyTo(ti); ti = grandChild2->getTopSegmentIterator(); ts.set(0, 3, 0, false); ts.applyTo(ti); ti->toRight(); ts.set(3, 3, NULL_INDEX, true); ts.applyTo(ti); ti->toRight(); ts.set(6, 3, NULL_INDEX, false); ts.applyTo(ti); parent->fixParseInfo(); }