Esempio n. 1
0
PolySolverCorrectionInfo PolySolverNAD::CorrectNodeByMultifurcation(Node* geneTree, Node* speciesTree, unordered_map<Node*, Node*> geneLeavesSpeciesMapping, Node* n)
{
    //TODO : code copied from above

    unordered_map<Node*, Node*> oldlcaMapping = GeneSpeciesTreeUtil::Instance()->GetLCAMapping(geneTree, speciesTree, geneLeavesSpeciesMapping);
    unordered_map<Node*, Node*> lcaMapping;

    //here we'll copy the original gene tree and manage to find the node of interest in this copy
    string prevLabel = n->GetLabel();
    string tempLabel = "temp-label-no-one-else-should-use";
    n->SetLabel(tempLabel);
    Node* geneTreeCopy = GeneSpeciesTreeUtil::Instance()->CopyTreeWithNodeMapping(geneTree, oldlcaMapping, lcaMapping);
    n->SetLabel(prevLabel);

    //find the node of interest
    Node* node_to_correct = NULL;
    TreeIterator* it = geneTreeCopy->GetPostOrderIterator();
    while (Node* ncopy = it->next())
    {
        if (ncopy->GetLabel() == tempLabel)
        {
            node_to_correct = ncopy;
            node_to_correct->SetLabel(prevLabel);
            break;
        }

    }
    geneTreeCopy->CloseIterator(it);

    vector<string> leafLabels;
    vector<Node*> n_leaves = node_to_correct->GetLeafVector();
    for (int i = 0; i < n_leaves.size(); i++)
    {
        leafLabels.push_back(n_leaves[i]->GetLabel());
    }

    pair<Node*, unordered_map<Node*, Node*> > polytomizedWithMapping = PolytomizeNAD(node_to_correct, speciesTree, lcaMapping);
    Node* polytomized = polytomizedWithMapping.first;


    //replace the subtree that just got polytomized
    if (!node_to_correct->IsRoot())
    {
        Node* parent = node_to_correct->GetParent();
        parent->RemoveChild(node_to_correct);
        parent->AddSubTree(polytomized);
        delete node_to_correct;
    }
    else
    {
        delete geneTreeCopy;
        geneTreeCopy = polytomized;
    }


    PolySolverCorrectionInfo info;
    info.nadCladeGenes = leafLabels;
    info.firstPolySize = polytomized->GetChild(0)->GetNbChildren();
    info.secondPolySize = polytomized->GetChild(1)->GetNbChildren();
    this->SolvePolytomy(polytomized->GetChild(0), speciesTree, polytomizedWithMapping.second);
    this->SolvePolytomy(polytomized->GetChild(1), speciesTree, polytomizedWithMapping.second);


    geneTreeCopy->DeleteSingleChildDescendants();

    info.correction = geneTreeCopy;

    return info;
}
Esempio n. 2
0
pair<Node*, Node*> PolySolverNAD::GetRandomPolytomy(int k, int verbose)
{
    Node* speciesTree = new Node(false);

    double s_size_factor = 2.5 * (double)(rand() % 1000)/1000.0 + 0.5;  //between 0.5 and 3

    for (int i = 0; i < s_size_factor*k; i++)
    {
        Node* c = speciesTree->AddChild();
        c->SetLabel("S" + Util::ToString(i));
    }

    speciesTree->BinarizeRandomly();

    //get an ordering of the internal nodes...this will let us pick one at random
    vector<Node*> internalNodes;

    TreeIterator* it = speciesTree->GetPostOrderIterator(false);
    while (Node* s = it->next())
    {
        if (!s->IsLeaf())
        {
            internalNodes.push_back(s);
        }

    }
    speciesTree->CloseIterator(it);

    //generate k gene subtrees
    unordered_map<Node*, Node*> lcaMapping;
    vector<Node*> forest;
    map<Node*, Node*> geneLeftSpecies;
    map<Node*, Node*> geneRightSpecies;

    for (int i = 0; i < k; i++)
    {
        Node* g = new Node(false);
        g->SetLabel("G" + Util::ToString(i));

        //pick an lca for g at random
        Node* lca = internalNodes[rand() % internalNodes.size()];
        lca->SetLabel(lca->GetLabel() + "_" + Util::ToString(i));
        lcaMapping[g] = lca;

        //add something left and right to enforce s(g) = lca
        //by adding a species specific to g on both sides
        bool done = false;
        TreeIterator* itLeft = lca->GetChild(0)->GetPostOrderIterator();
        while (Node* s = itLeft->next())
        {
            if (!done)
            {
                string slbl = s->GetLabel();
                if (slbl[0] == 'S') //got an original species leaf
                {
                    Node* sg = s->AddChild();
                    sg->SetLabel("XL" + Util::ToString(i));

                    Node* gs = g->AddChild();
                    gs->SetLabel("XL" + Util::ToString(i));

                    lcaMapping[gs] = sg;
                    done = true;

                    geneLeftSpecies[g] = s;
                }
            }
        }
        lca->CloseIterator(itLeft);


        done = false;
        TreeIterator* itRight = lca->GetChild(1)->GetPostOrderIterator();
        while (Node* s = itRight->next())
        {
            if (!done)
            {
                string slbl = s->GetLabel();
                if (slbl[0] == 'S') //got an original species leaf
                {
                    Node* sg = s->AddChild();
                    sg->SetLabel("XR" + Util::ToString(i));

                    Node* gs = g->AddChild();
                    gs->SetLabel("XR" + Util::ToString(i));

                    lcaMapping[gs] = sg;
                    done = true;

                    geneRightSpecies[g] = s;
                }
            }
        }
        lca->CloseIterator(itRight);

        forest.push_back(g);
    }

    int AD_prob = rand() % 50 + 25; //between 25-75% chances of having a dup

    //ok, we have a forest.  Now, everything is either S or NAD (no species are shared since we created one specific to each gene)
    //so here we add a couple AD
    for (int i = 0; i < forest.size(); i++)
    {
        Node* g1 = forest[i];
        Node* s1 = lcaMapping[g1];
        for (int j = i + 1; j < forest.size(); j++)
        {
            Node* g2 = forest[j];
            Node* s2 = lcaMapping[g2];

            //they're related...make them AD if we're lucky enough
            if (s1->HasAncestor(s2) || s2->HasAncestor(s1))
            {
                int r = rand() % 100;

                //add a species near the g1left species s.t. g1 and g2 will share a gene of this species
                if (r < AD_prob)
                {
                    Node* s_to_add_to = geneLeftSpecies[g1];
                    if (!s1->HasAncestor(s2))
                        s_to_add_to = geneLeftSpecies[g2];

                    Node* dspecies = s_to_add_to->AddChild();
                    dspecies->SetLabel("AD_" + g1->GetLabel() + "_" + g2->GetLabel());

                    Node* newg1 = g1->AddChild();
                    newg1->SetLabel(dspecies->GetLabel());
                    lcaMapping[newg1] = dspecies;

                    Node* newg2 = g2->AddChild();
                    newg2->SetLabel(dspecies->GetLabel());
                    lcaMapping[newg2] = dspecies;
                }
            }
        }
    }



    //if everything was done correctly, binarizing S
    speciesTree->BinarizeRandomly();
    speciesTree->DeleteSingleChildDescendants();

    string sstr = NewickLex::ToNewickString(speciesTree);
    if (verbose > 0)
        cout<<"S="<<sstr<<endl;

    Node* poly = new Node(false);
    for (int i = 0; i < forest.size(); i++)
    {
        forest[i]->BinarizeRandomly();


        poly->AddSubTree(forest[i]);
    }

    string gstr = NewickLex::ToNewickString(poly);
    if (verbose > 0)
        cout<<"G="<<"="<<gstr<<endl;


    //we have to recreate the species tree, or later on lca mapping will get messed up FOR UNKNOWN REASONS !
    string spNewick = NewickLex::ToNewickString(speciesTree);
    delete speciesTree;

    speciesTree = NewickLex::ParseNewickString(spNewick, true);

    lcaMapping.clear();

    return make_pair(poly, speciesTree);
}
Esempio n. 3
0
PolySolverCorrectionInfo PolySolverNAD::CorrectHighestNAD(Node* geneTree, Node* speciesTree, unordered_map<Node*, Node*> geneLeavesSpeciesMapping)
{



    unordered_map<Node*, Node*> oldlcaMapping = GeneSpeciesTreeUtil::Instance()->GetLCAMapping(geneTree, speciesTree, geneLeavesSpeciesMapping);
    unordered_map<Node*, Node*> lcaMapping;

    Node* geneTreeCopy = GeneSpeciesTreeUtil::Instance()->CopyTreeWithNodeMapping(geneTree, oldlcaMapping, lcaMapping);

    //GeneSpeciesTreeUtil::Instance()->PrintMapping(geneTreeCopy, lcaMapping);

    TreeIterator* it = geneTreeCopy->GetPreOrderIterator();
    while (Node* n = it->next())
    {
        if (!n->IsLeaf())
        {
            //first check if it's a duplication, lca mapping classic rule
            if (lcaMapping[n->GetChild(0)] == lcaMapping[n] || lcaMapping[n->GetChild(1)] == lcaMapping[n])
            {
                if (!GeneSpeciesTreeUtil::Instance()->HaveCommonSpecies(n->GetChild(0), n->GetChild(1), lcaMapping))
                {
                    vector<string> leafLabels;
                    vector<Node*> n_leaves = n->GetLeafVector();
                    for (int i = 0; i < n_leaves.size(); i++)
                    {
                        leafLabels.push_back(n_leaves[i]->GetLabel());
                    }

                    //there it is ! the highest NAD
                    pair<Node*, unordered_map<Node*, Node*> > polytomizedWithMapping = PolytomizeNAD(n, speciesTree, lcaMapping);
                    Node* polytomized = polytomizedWithMapping.first;


                    //HERE we do some not so clean stuff...because whatever we do, we'll exit this function
                    geneTreeCopy->CloseIterator(it);

                    //replace the subtree that just got polytomized
                    if (!n->IsRoot())
                    {
                        Node* parent = n->GetParent();
                        parent->RemoveChild(n);
                        parent->AddSubTree(polytomized);
                        delete n;
                    }
                    else
                    {
                        delete geneTreeCopy;
                        geneTreeCopy = polytomized;
                    }

                    //cout<<"COPY AFTER = "<<NewickLex::ToNewickString(geneTreeCopy)<<endl;


                    PolySolverCorrectionInfo info;
                    info.nadCladeGenes = leafLabels;
                    info.firstPolySize = polytomized->GetChild(0)->GetNbChildren();
                    info.secondPolySize = polytomized->GetChild(1)->GetNbChildren();
                    this->SolvePolytomy(polytomized->GetChild(0), speciesTree, polytomizedWithMapping.second);
                    this->SolvePolytomy(polytomized->GetChild(1), speciesTree, polytomizedWithMapping.second);

                    //cout<<"CORRECTED = "<<NewickLex::ToNewickString(geneTreeCopy)<<endl;

                    geneTreeCopy->DeleteSingleChildDescendants();

                    info.correction = geneTreeCopy;

                    return info;
                }
            }
        }
    }
    geneTreeCopy->CloseIterator(it);

    //if we got here, we found no NAD, and since we didn't do anything we return NULL
    delete geneTreeCopy;

    PolySolverCorrectionInfo info;
    info.correction = NULL;
    return info;
}