void TreeTools::midpointRooting(Tree& tree) { throw Exception("TreeTools::midpointRooting(Tree). This function is deprecated, use TreeTemplateTools::midRoot instead!"); if (tree.isRooted()) tree.unroot(); DistanceMatrix* dist = getDistanceMatrix(tree); vector<size_t> pos = MatrixTools::whichMax(dist->asMatrix()); double dmid = (*dist)(pos[0], pos[1]) / 2; int id1 = tree.getLeafId(dist->getName(pos[0])); int id2 = tree.getLeafId(dist->getName(pos[1])); int rootId = tree.getRootId(); double d1 = getDistanceBetweenAnyTwoNodes(tree, id1, rootId); double d2 = getDistanceBetweenAnyTwoNodes(tree, id2, rootId); int current = d2 > d1 ? id2 : id1; delete dist; double l = tree.getDistanceToFather(current); double c = l; while (c < dmid) { current = tree.getFatherId(current); l = tree.getDistanceToFather(current); c += l; } tree.newOutGroup(current); int brother = tree.getSonsId(tree.getRootId())[1]; if (brother == current) brother = tree.getSonsId(tree.getRootId())[0]; tree.setDistanceToFather(current, l - (c - dmid)); tree.setDistanceToFather(brother, c - dmid); }
TreeTemplate<Node>* OptimizationTools::buildDistanceTree( DistanceEstimation& estimationMethod, AgglomerativeDistanceMethod& reconstructionMethod, const ParameterList& parametersToIgnore, bool optimizeBrLen, const std::string& param, double tolerance, unsigned int tlEvalMax, OutputStream* profiler, OutputStream* messenger, unsigned int verbose) throw (Exception) { estimationMethod.resetAdditionalParameters(); estimationMethod.setVerbose(verbose); if (param == DISTANCEMETHOD_PAIRWISE) { ParameterList tmp = estimationMethod.getSubstitutionModel().getIndependentParameters(); tmp.addParameters(estimationMethod.getRateDistribution().getIndependentParameters()); tmp.deleteParameters(parametersToIgnore.getParameterNames()); estimationMethod.setAdditionalParameters(tmp); } TreeTemplate<Node>* tree = NULL; TreeTemplate<Node>* previousTree = NULL; bool test = true; while (test) { // Compute matrice: if (verbose > 0) ApplicationTools::displayTask("Estimating distance matrix", true); estimationMethod.computeMatrix(); DistanceMatrix* matrix = estimationMethod.getMatrix(); if (verbose > 0) ApplicationTools::displayTaskDone(); // Compute tree: if (matrix->size() == 2) { //Special case, there is only one possible tree: Node* n1 = new Node(0); Node* n2 = new Node(1, matrix->getName(0)); n2->setDistanceToFather((*matrix)(0,0) / 2.); Node* n3 = new Node(2, matrix->getName(1)); n3->setDistanceToFather((*matrix)(0,0) / 2.); n1->addSon(n2); n1->addSon(n3); tree = new TreeTemplate<Node>(n1); break; } if (verbose > 0) ApplicationTools::displayTask("Building tree"); reconstructionMethod.setDistanceMatrix(*matrix); reconstructionMethod.computeTree(); previousTree = tree; delete matrix; tree = dynamic_cast<TreeTemplate<Node>*>(reconstructionMethod.getTree()); if (verbose > 0) ApplicationTools::displayTaskDone(); if (previousTree && verbose > 0) { int rf = TreeTools::robinsonFouldsDistance(*previousTree, *tree, false); ApplicationTools::displayResult("Topo. distance with previous iteration", TextTools::toString(rf)); test = (rf == 0); delete previousTree; } if (param != DISTANCEMETHOD_ITERATIONS) break; // Ends here. // Now, re-estimate parameters: auto_ptr<SubstitutionModel> model(estimationMethod.getSubstitutionModel().clone()); auto_ptr<DiscreteDistribution> rdist(estimationMethod.getRateDistribution().clone()); DRHomogeneousTreeLikelihood tl(*tree, *estimationMethod.getData(), model.get(), rdist.get(), true, verbose > 1); tl.initialize(); ParameterList parameters = tl.getParameters(); if (!optimizeBrLen) { vector<string> vs = tl.getBranchLengthsParameters().getParameterNames(); parameters.deleteParameters(vs); } parameters.deleteParameters(parametersToIgnore.getParameterNames()); optimizeNumericalParameters(&tl, parameters, NULL, 0, tolerance, tlEvalMax, messenger, profiler, verbose > 0 ? verbose - 1 : 0); if (verbose > 0) { ParameterList tmp = tl.getSubstitutionModelParameters(); for (unsigned int i = 0; i < tmp.size(); i++) { ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue())); } tmp = tl.getRateDistributionParameters(); for (unsigned int i = 0; i < tmp.size(); i++) { ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue())); } } } return tree; }
string Alignment::_computeTree(DistanceMatrix dists, DistanceMatrix vars) throw (Exception) { // Initialization: std::map<size_t, Node*> currentNodes_; std::vector<double> sumDist_(dists.size()); double lambda_; for (size_t i = 0; i < dists.size(); i++) { currentNodes_[i] = new Node(static_cast<int>(i), dists.getName(i)); } int idNextNode = dists.size(); vector<double> newDist(dists.size()); vector<double> newVar(dists.size()); // Build tree: while (currentNodes_.size() > 3) { // get best pair for (std::map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) { size_t id = i->first; sumDist_[id] = 0; for (map<size_t, Node*>::iterator j = currentNodes_.begin(); j != currentNodes_.end(); j++) { size_t jd = j->first; sumDist_[id] += dists(id, jd); } } vector<size_t> bestPair(2); double critMax = std::log(0.); for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) { size_t id = i->first; map<size_t, Node*>::iterator j = i; j++; for ( ; j != currentNodes_.end(); j++) { size_t jd = j->first; double crit = sumDist_[id] + sumDist_[jd] - static_cast<double>(currentNodes_.size() - 2) * dists(id, jd); // cout << "\t" << id << "\t" << jd << "\t" << crit << endl; if (crit > critMax) { critMax = crit; bestPair[0] = id; bestPair[1] = jd; } } } if (critMax == std::log(0.)) throw Exception("Unexpected error: no maximum criterium found."); // get branch lengths for pair double ratio = (sumDist_[bestPair[0]] - sumDist_[bestPair[1]]) / static_cast<double>(currentNodes_.size() - 2); vector<double> d(2); d[0] = std::max(.5 * (dists(bestPair[0], bestPair[1]) + ratio), MIN_BRANCH_LENGTH); d[1] = std::max(.5 * (dists(bestPair[0], bestPair[1]) - ratio), MIN_BRANCH_LENGTH); Node* best1 = currentNodes_[bestPair[0]]; Node* best2 = currentNodes_[bestPair[1]]; // Distances may be used by getParentNodes (PGMA for instance). best1->setDistanceToFather(d[0]); best2->setDistanceToFather(d[1]); Node* parent = new Node(idNextNode++); parent->addSon(best1); parent->addSon(best2); // compute lambda lambda_ = 0; if (vars(bestPair[0], bestPair[1]) == 0) lambda_ = .5; else { for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) { size_t id = i->first; if (id != bestPair[0] && id != bestPair[1]) lambda_ += (vars(bestPair[1], id) - vars(bestPair[0], id)); } double div = 2 * static_cast<double>(currentNodes_.size() - 2) * vars(bestPair[0], bestPair[1]); lambda_ /= div; lambda_ += .5; } if (lambda_ < 0.) lambda_ = 0.; if (lambda_ > 1.) lambda_ = 1.; for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) { size_t id = i->first; if (id != bestPair[0] && id != bestPair[1]) { newDist[id] = std::max(lambda_ * (dists(bestPair[0], id) - d[0]) + (1 - lambda_) * (dists(bestPair[1], id) - d[1]), 0.); newVar[id] = lambda_ * vars(bestPair[0], id) + (1 - lambda_) * vars(bestPair[1], id) - lambda_ * (1 - lambda_) * vars(bestPair[0], bestPair[1]); } else newDist[id] = 0; } // Actualize currentNodes_: currentNodes_[bestPair[0]] = parent; currentNodes_.erase(bestPair[1]); for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) { size_t id = i->first; dists(bestPair[0], id) = dists(id, bestPair[0]) = newDist[id]; vars(bestPair[0], id) = vars(id, bestPair[0]) = newVar[id]; } } // final step Node* root = new Node(idNextNode); map<size_t, Node* >::iterator it = currentNodes_.begin(); size_t i1 = it->first; Node* n1 = it->second; it++; size_t i2 = it->first; Node* n2 = it->second; if (currentNodes_.size() == 2) { // Rooted double d = dists(i1, i2) / 2; root->addSon(n1); root->addSon(n2); n1->setDistanceToFather(d); n2->setDistanceToFather(d); } else { // Unrooted it++; size_t i3 = it->first; Node* n3 = it->second; double d1 = std::max(dists(i1, i2) + dists(i1, i3) - dists(i2, i3), MIN_BRANCH_LENGTH); double d2 = std::max(dists(i2, i1) + dists(i2, i3) - dists(i1, i3), MIN_BRANCH_LENGTH); double d3 = std::max(dists(i3, i1) + dists(i3, i2) - dists(i1, i2), MIN_BRANCH_LENGTH); root->addSon(n1); root->addSon(n2); root->addSon(n3); n1->setDistanceToFather(d1 / 2.); n2->setDistanceToFather(d2 / 2.); n3->setDistanceToFather(d3 / 2.); } Tree *tree_ = new TreeTemplate<Node>(root); stringstream ss; Newick treeWriter; if (!tree_) throw Exception("The tree is empty"); treeWriter.write(*tree_, ss); delete tree_; string s{ss.str()}; s.erase(s.find_last_not_of(" \n\r\t")+1); return s; }