pair<Node*, unordered_map<Node*, Node*> > PolySolverNAD::PolytomizeNAD(Node* nadNode, Node* speciesTree, unordered_map<Node*, Node*> lcaMapping) { set<Node*> leftSubtrees, rightSubtrees; Node* s = lcaMapping[nadNode]; //TODO : there should be a way not to iterate uselessly into a taken subtree (preorder traversal that we stop) TreeIterator* it = nadNode->GetPostOrderIterator(); while (Node* n = it->next()) { if (n != nadNode) { //here we maximal subtree either on the left or right if (lcaMapping[n] != s && lcaMapping[n->GetParent()] == s) { if (lcaMapping[n]->HasAncestor(s->GetChild(0))) { leftSubtrees.insert(n); } else //if (lcaMapping[n]->HasAncestor(s->GetChild(1))) should be the only possibility here { rightSubtrees.insert(n); } } } } nadNode->CloseIterator(it); Node* newShizzle = new Node(false); Node* left = newShizzle->AddChild(); Node* right = newShizzle->AddChild(); unordered_map<Node*, Node*> newMapping; for (set<Node*>::iterator itLeft = leftSubtrees.begin(); itLeft != leftSubtrees.end(); itLeft++) { Node* copy = GeneSpeciesTreeUtil::Instance()->CopyTreeWithNodeMapping((*itLeft), lcaMapping, newMapping); left->AddSubTree(copy); } for (set<Node*>::iterator itRight = rightSubtrees.begin(); itRight != rightSubtrees.end(); itRight++) { Node* copy = GeneSpeciesTreeUtil::Instance()->CopyTreeWithNodeMapping((*itRight), lcaMapping, newMapping); right->AddSubTree(copy); } newMapping[newShizzle] = s; if (left->GetNbChildren() > 1) { newMapping[left] = GeneSpeciesTreeUtil::Instance()->GetSingleNodeLCAMapping(left, speciesTree, newMapping); } if (right->GetNbChildren() > 1) { newMapping[right] = GeneSpeciesTreeUtil::Instance()->GetSingleNodeLCAMapping(right, speciesTree, newMapping); } newShizzle->DeleteSingleChildDescendants(); return make_pair(newShizzle, newMapping); }
Node* Node::SetAsRootInCopy() { Node* res = this->SetAsRootInCopy(NULL); res->DeleteSingleChildDescendants(); return res; }
pair<Node*, Node*> PolySolverNAD::GetRandomPolytomy(int k, int verbose) { Node* speciesTree = new Node(false); double s_size_factor = 2.5 * (double)(rand() % 1000)/1000.0 + 0.5; //between 0.5 and 3 for (int i = 0; i < s_size_factor*k; i++) { Node* c = speciesTree->AddChild(); c->SetLabel("S" + Util::ToString(i)); } speciesTree->BinarizeRandomly(); //get an ordering of the internal nodes...this will let us pick one at random vector<Node*> internalNodes; TreeIterator* it = speciesTree->GetPostOrderIterator(false); while (Node* s = it->next()) { if (!s->IsLeaf()) { internalNodes.push_back(s); } } speciesTree->CloseIterator(it); //generate k gene subtrees unordered_map<Node*, Node*> lcaMapping; vector<Node*> forest; map<Node*, Node*> geneLeftSpecies; map<Node*, Node*> geneRightSpecies; for (int i = 0; i < k; i++) { Node* g = new Node(false); g->SetLabel("G" + Util::ToString(i)); //pick an lca for g at random Node* lca = internalNodes[rand() % internalNodes.size()]; lca->SetLabel(lca->GetLabel() + "_" + Util::ToString(i)); lcaMapping[g] = lca; //add something left and right to enforce s(g) = lca //by adding a species specific to g on both sides bool done = false; TreeIterator* itLeft = lca->GetChild(0)->GetPostOrderIterator(); while (Node* s = itLeft->next()) { if (!done) { string slbl = s->GetLabel(); if (slbl[0] == 'S') //got an original species leaf { Node* sg = s->AddChild(); sg->SetLabel("XL" + Util::ToString(i)); Node* gs = g->AddChild(); gs->SetLabel("XL" + Util::ToString(i)); lcaMapping[gs] = sg; done = true; geneLeftSpecies[g] = s; } } } lca->CloseIterator(itLeft); done = false; TreeIterator* itRight = lca->GetChild(1)->GetPostOrderIterator(); while (Node* s = itRight->next()) { if (!done) { string slbl = s->GetLabel(); if (slbl[0] == 'S') //got an original species leaf { Node* sg = s->AddChild(); sg->SetLabel("XR" + Util::ToString(i)); Node* gs = g->AddChild(); gs->SetLabel("XR" + Util::ToString(i)); lcaMapping[gs] = sg; done = true; geneRightSpecies[g] = s; } } } lca->CloseIterator(itRight); forest.push_back(g); } int AD_prob = rand() % 50 + 25; //between 25-75% chances of having a dup //ok, we have a forest. Now, everything is either S or NAD (no species are shared since we created one specific to each gene) //so here we add a couple AD for (int i = 0; i < forest.size(); i++) { Node* g1 = forest[i]; Node* s1 = lcaMapping[g1]; for (int j = i + 1; j < forest.size(); j++) { Node* g2 = forest[j]; Node* s2 = lcaMapping[g2]; //they're related...make them AD if we're lucky enough if (s1->HasAncestor(s2) || s2->HasAncestor(s1)) { int r = rand() % 100; //add a species near the g1left species s.t. g1 and g2 will share a gene of this species if (r < AD_prob) { Node* s_to_add_to = geneLeftSpecies[g1]; if (!s1->HasAncestor(s2)) s_to_add_to = geneLeftSpecies[g2]; Node* dspecies = s_to_add_to->AddChild(); dspecies->SetLabel("AD_" + g1->GetLabel() + "_" + g2->GetLabel()); Node* newg1 = g1->AddChild(); newg1->SetLabel(dspecies->GetLabel()); lcaMapping[newg1] = dspecies; Node* newg2 = g2->AddChild(); newg2->SetLabel(dspecies->GetLabel()); lcaMapping[newg2] = dspecies; } } } } //if everything was done correctly, binarizing S speciesTree->BinarizeRandomly(); speciesTree->DeleteSingleChildDescendants(); string sstr = NewickLex::ToNewickString(speciesTree); if (verbose > 0) cout<<"S="<<sstr<<endl; Node* poly = new Node(false); for (int i = 0; i < forest.size(); i++) { forest[i]->BinarizeRandomly(); poly->AddSubTree(forest[i]); } string gstr = NewickLex::ToNewickString(poly); if (verbose > 0) cout<<"G="<<"="<<gstr<<endl; //we have to recreate the species tree, or later on lca mapping will get messed up FOR UNKNOWN REASONS ! string spNewick = NewickLex::ToNewickString(speciesTree); delete speciesTree; speciesTree = NewickLex::ParseNewickString(spNewick, true); lcaMapping.clear(); return make_pair(poly, speciesTree); }
PolySolverCorrectionInfo PolySolverNAD::CorrectNodeByMultifurcation(Node* geneTree, Node* speciesTree, unordered_map<Node*, Node*> geneLeavesSpeciesMapping, Node* n) { //TODO : code copied from above unordered_map<Node*, Node*> oldlcaMapping = GeneSpeciesTreeUtil::Instance()->GetLCAMapping(geneTree, speciesTree, geneLeavesSpeciesMapping); unordered_map<Node*, Node*> lcaMapping; //here we'll copy the original gene tree and manage to find the node of interest in this copy string prevLabel = n->GetLabel(); string tempLabel = "temp-label-no-one-else-should-use"; n->SetLabel(tempLabel); Node* geneTreeCopy = GeneSpeciesTreeUtil::Instance()->CopyTreeWithNodeMapping(geneTree, oldlcaMapping, lcaMapping); n->SetLabel(prevLabel); //find the node of interest Node* node_to_correct = NULL; TreeIterator* it = geneTreeCopy->GetPostOrderIterator(); while (Node* ncopy = it->next()) { if (ncopy->GetLabel() == tempLabel) { node_to_correct = ncopy; node_to_correct->SetLabel(prevLabel); break; } } geneTreeCopy->CloseIterator(it); vector<string> leafLabels; vector<Node*> n_leaves = node_to_correct->GetLeafVector(); for (int i = 0; i < n_leaves.size(); i++) { leafLabels.push_back(n_leaves[i]->GetLabel()); } pair<Node*, unordered_map<Node*, Node*> > polytomizedWithMapping = PolytomizeNAD(node_to_correct, speciesTree, lcaMapping); Node* polytomized = polytomizedWithMapping.first; //replace the subtree that just got polytomized if (!node_to_correct->IsRoot()) { Node* parent = node_to_correct->GetParent(); parent->RemoveChild(node_to_correct); parent->AddSubTree(polytomized); delete node_to_correct; } else { delete geneTreeCopy; geneTreeCopy = polytomized; } PolySolverCorrectionInfo info; info.nadCladeGenes = leafLabels; info.firstPolySize = polytomized->GetChild(0)->GetNbChildren(); info.secondPolySize = polytomized->GetChild(1)->GetNbChildren(); this->SolvePolytomy(polytomized->GetChild(0), speciesTree, polytomizedWithMapping.second); this->SolvePolytomy(polytomized->GetChild(1), speciesTree, polytomizedWithMapping.second); geneTreeCopy->DeleteSingleChildDescendants(); info.correction = geneTreeCopy; return info; }
PolySolverCorrectionInfo PolySolverNAD::CorrectHighestNAD(Node* geneTree, Node* speciesTree, unordered_map<Node*, Node*> geneLeavesSpeciesMapping) { unordered_map<Node*, Node*> oldlcaMapping = GeneSpeciesTreeUtil::Instance()->GetLCAMapping(geneTree, speciesTree, geneLeavesSpeciesMapping); unordered_map<Node*, Node*> lcaMapping; Node* geneTreeCopy = GeneSpeciesTreeUtil::Instance()->CopyTreeWithNodeMapping(geneTree, oldlcaMapping, lcaMapping); //GeneSpeciesTreeUtil::Instance()->PrintMapping(geneTreeCopy, lcaMapping); TreeIterator* it = geneTreeCopy->GetPreOrderIterator(); while (Node* n = it->next()) { if (!n->IsLeaf()) { //first check if it's a duplication, lca mapping classic rule if (lcaMapping[n->GetChild(0)] == lcaMapping[n] || lcaMapping[n->GetChild(1)] == lcaMapping[n]) { if (!GeneSpeciesTreeUtil::Instance()->HaveCommonSpecies(n->GetChild(0), n->GetChild(1), lcaMapping)) { vector<string> leafLabels; vector<Node*> n_leaves = n->GetLeafVector(); for (int i = 0; i < n_leaves.size(); i++) { leafLabels.push_back(n_leaves[i]->GetLabel()); } //there it is ! the highest NAD pair<Node*, unordered_map<Node*, Node*> > polytomizedWithMapping = PolytomizeNAD(n, speciesTree, lcaMapping); Node* polytomized = polytomizedWithMapping.first; //HERE we do some not so clean stuff...because whatever we do, we'll exit this function geneTreeCopy->CloseIterator(it); //replace the subtree that just got polytomized if (!n->IsRoot()) { Node* parent = n->GetParent(); parent->RemoveChild(n); parent->AddSubTree(polytomized); delete n; } else { delete geneTreeCopy; geneTreeCopy = polytomized; } //cout<<"COPY AFTER = "<<NewickLex::ToNewickString(geneTreeCopy)<<endl; PolySolverCorrectionInfo info; info.nadCladeGenes = leafLabels; info.firstPolySize = polytomized->GetChild(0)->GetNbChildren(); info.secondPolySize = polytomized->GetChild(1)->GetNbChildren(); this->SolvePolytomy(polytomized->GetChild(0), speciesTree, polytomizedWithMapping.second); this->SolvePolytomy(polytomized->GetChild(1), speciesTree, polytomizedWithMapping.second); //cout<<"CORRECTED = "<<NewickLex::ToNewickString(geneTreeCopy)<<endl; geneTreeCopy->DeleteSingleChildDescendants(); info.correction = geneTreeCopy; return info; } } } } geneTreeCopy->CloseIterator(it); //if we got here, we found no NAD, and since we didn't do anything we return NULL delete geneTreeCopy; PolySolverCorrectionInfo info; info.correction = NULL; return info; }