ProbabilisticRewardMapping* RewardMappingTools::computeRewardVectors( const DRTreeLikelihood& drtl, const vector<int>& nodeIds, Reward& reward, bool verbose) throw (Exception) { // Preamble: if (!drtl.isInitialized()) throw Exception("RewardMappingTools::computeRewardVectors(). Likelihood object is not initialized."); // A few variables we'll need: const TreeTemplate<Node> tree(drtl.getTree()); const SiteContainer* sequences = drtl.getData(); const DiscreteDistribution* rDist = drtl.getRateDistribution(); size_t nbSites = sequences->getNumberOfSites(); size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites(); size_t nbStates = sequences->getAlphabet()->getSize(); size_t nbClasses = rDist->getNumberOfCategories(); vector<const Node*> nodes = tree.getNodes(); const vector<size_t>* rootPatternLinks = &drtl.getLikelihoodData()->getRootArrayPositions(); nodes.pop_back(); // Remove root node. size_t nbNodes = nodes.size(); // We create a new ProbabilisticRewardMapping object: ProbabilisticRewardMapping* rewards = new ProbabilisticRewardMapping(tree, &reward, nbSites); // Store likelihood for each rate for each site: VVVdouble lik; drtl.computeLikelihoodAtNode(tree.getRootId(), lik); Vdouble Lr(nbDistinctSites, 0); Vdouble rcProbs = rDist->getProbabilities(); Vdouble rcRates = rDist->getCategories(); for (size_t i = 0; i < nbDistinctSites; i++) { VVdouble* lik_i = &lik[i]; for (size_t c = 0; c < nbClasses; c++) { Vdouble* lik_i_c = &(*lik_i)[c]; double rc = rDist->getProbability(c); for (size_t s = 0; s < nbStates; s++) { Lr[i] += (*lik_i_c)[s] * rc; } } } // Compute the reward for each class and each branch in the tree: if (verbose) ApplicationTools::displayTask("Compute joint node-pairs likelihood", true); for (size_t l = 0; l < nbNodes; ++l) { // For each node, const Node* currentNode = nodes[l]; if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId())) continue; const Node* father = currentNode->getFather(); double d = currentNode->getDistanceToFather(); if (verbose) ApplicationTools::displayGauge(l, nbNodes - 1); Vdouble rewardsForCurrentNode(nbDistinctSites); // Now we've got to compute likelihoods in a smart manner... ;) VVVdouble likelihoodsFatherConstantPart(nbDistinctSites); for (size_t i = 0; i < nbDistinctSites; i++) { VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; likelihoodsFatherConstantPart_i->resize(nbClasses); for (size_t c = 0; c < nbClasses; c++) { Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; likelihoodsFatherConstantPart_i_c->resize(nbStates); double rc = rDist->getProbability(c); for (size_t s = 0; s < nbStates; s++) { // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s); // freq is already accounted in the array (*likelihoodsFatherConstantPart_i_c)[s] = rc; } } } // First, what will remain constant: size_t nbSons = father->getNumberOfSons(); for (size_t n = 0; n < nbSons; n++) { const Node* currentSon = father->getSon(n); if (currentSon->getId() != currentNode->getId()) { const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId()); // Now iterate over all site partitions: auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId())); VVVdouble pxy; bool first; while (mit->hasNext()) { TreeLikelihood::ConstBranchModelDescription* bmd = mit->next(); auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator()); first = true; while (sit->hasNext()) { size_t i = sit->next(); // We retrieve the transition probabilities for this site partition: if (first) { pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i); first = false; } const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i]; VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; c++) { const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c]; Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; VVdouble* pxy_c = &pxy[c]; for (size_t x = 0; x < nbStates; x++) { Vdouble* pxy_c_x = &(*pxy_c)[x]; double likelihood = 0.; for (size_t y = 0; y < nbStates; y++) { likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y]; } (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood; } } } } } } if (father->hasFather()) { const Node* currentSon = father->getFather(); const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId()); // Now iterate over all site partitions: auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId())); VVVdouble pxy; bool first; while (mit->hasNext()) { TreeLikelihood::ConstBranchModelDescription* bmd = mit->next(); auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator()); first = true; while (sit->hasNext()) { size_t i = sit->next(); // We retrieve the transition probabilities for this site partition: if (first) { pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i); first = false; } const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i]; VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; c++) { const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c]; Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; VVdouble* pxy_c = &pxy[c]; for (size_t x = 0; x < nbStates; x++) { double likelihood = 0.; for (size_t y = 0; y < nbStates; y++) { Vdouble* pxy_c_x = &(*pxy_c)[y]; likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y]; } (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood; } } } } } else { // Account for root frequencies: for (size_t i = 0; i < nbDistinctSites; i++) { vector<double> freqs = drtl.getRootFrequencies(i); VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; c++) { Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; for (size_t x = 0; x < nbStates; x++) { (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x]; } } } } // Then, we deal with the node of interest. // We first average upon 'y' to save computations, and then upon 'x'. // ('y' is the state at 'node' and 'x' the state at 'father'.) // Iterate over all site partitions: const VVVdouble* likelihoodsFather_node = &(drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId())); auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId())); VVVdouble pxy; bool first; while (mit->hasNext()) { TreeLikelihood::ConstBranchModelDescription* bmd = mit->next(); reward.setSubstitutionModel(bmd->getModel()); // compute all nxy first: VVVdouble nxy(nbClasses); for (size_t c = 0; c < nbClasses; ++c) { VVdouble* nxy_c = &nxy[c]; double rc = rcRates[c]; Matrix<double>* nij = reward.getAllRewards(d * rc); nxy_c->resize(nbStates); for (size_t x = 0; x < nbStates; ++x) { Vdouble* nxy_c_x = &(*nxy_c)[x]; nxy_c_x->resize(nbStates); for (size_t y = 0; y < nbStates; ++y) { (*nxy_c_x)[y] = (*nij)(x, y); } } delete nij; } // Now loop over sites: auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator()); first = true; while (sit->hasNext()) { size_t i = sit->next(); // We retrieve the transition probabilities and substitution counts for this site partition: if (first) { pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i); first = false; } const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i]; VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; ++c) { const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c]; Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; const VVdouble* pxy_c = &pxy[c]; VVdouble* nxy_c = &nxy[c]; for (size_t x = 0; x < nbStates; ++x) { double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x]; const Vdouble* pxy_c_x = &(*pxy_c)[x]; for (size_t y = 0; y < nbStates; ++y) { double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x) * (*pxy_c_x)[y] * (*likelihoodsFather_node_i_c)[y]; // Now the vector computation: rewardsForCurrentNode[i] += likelihood_cxy * (*nxy_c)[x][y]; // <------------> <---------------> // Posterior probability | | // for site i and rate class c * | | // likelihood for this site------+ | // | // Reward function for site i and rate class c------+ } } } } } // Now we just have to copy the substitutions into the result vector: for (size_t i = 0; i < nbSites; ++i) { (*rewards)(l, i) = rewardsForCurrentNode[(*rootPatternLinks)[i]] / Lr[(*rootPatternLinks)[i]]; } } if (verbose) { if (ApplicationTools::message) *ApplicationTools::message << " "; ApplicationTools::displayTaskDone(); } return rewards; }
void TreeTemplateTools::midRoot(TreeTemplate<Node>& tree, short criterion, bool forceBranchRoot) { if (criterion != MIDROOT_VARIANCE && criterion != MIDROOT_SUM_OF_SQUARES) throw Exception("TreeTemplateTools::midRoot(). Illegal criterion value '" + TextTools::toString(criterion) + "'"); if (tree.isRooted()) tree.unroot(); Node* ref_root = tree.getRootNode(); // // The bestRoot object records : // -- the current best branch : .first // -- the current best value of the criterion : .second["value"] // -- the best position of the root on the branch : .second["position"] // 0 is toward the original root, 1 is away from it // pair<Node*, map<string, double> > best_root_branch; best_root_branch.first = ref_root; // nota: the root does not correspond to a branch as it has no father best_root_branch.second ["position"] = -1; best_root_branch.second ["score"] = numeric_limits<double>::max(); // find the best root getBestRootInSubtree_(tree, criterion, ref_root, best_root_branch); tree.rootAt(ref_root); // back to the original root // reroot const double pos = best_root_branch.second["position"]; if (pos < 1e-6 or pos > 1 - 1e-6) // The best root position is on a node (this is often the case with the sum of squares criterion) tree.rootAt(pos < 1e-6 ? best_root_branch.first->getFather() : best_root_branch.first); else // The best root position is somewhere on a branch (a new Node is created) { Node* new_root = new Node(); new_root->setId( TreeTools::getMPNUId(tree, tree.getRootId()) ); double root_branch_length = best_root_branch.first->getDistanceToFather(); Node* best_root_father = best_root_branch.first->getFather(); best_root_father->removeSon(best_root_branch.first); best_root_father->addSon(new_root); new_root->addSon(best_root_branch.first); new_root->setDistanceToFather(max(pos * root_branch_length, 1e-6)); best_root_branch.first->setDistanceToFather(max((1 - pos) * root_branch_length, 1e-6)); // The two branches leaving the root must have the same branch properties const vector<string> branch_properties = best_root_branch.first->getBranchPropertyNames(); for (vector<string>::const_iterator p = branch_properties.begin(); p != branch_properties.end(); ++p) { new_root->setBranchProperty(*p, *best_root_branch.first->getBranchProperty(*p)); } tree.rootAt(new_root); } if (forceBranchRoot) // if we want the root to be on a branch, not on a node { Node* orig_root = tree.getRootNode(); vector<Node*> root_sons = orig_root->getSons(); if (root_sons.size() > 2) { Node* nearest = root_sons.at(0); for (vector<Node*>::iterator n = root_sons.begin(); n != root_sons.end(); ++n) { if ((**n).getDistanceToFather() < nearest->getDistanceToFather()) nearest = *n; } const double d = nearest->getDistanceToFather(); Node* new_root = new Node(); new_root->setId( TreeTools::getMPNUId(tree, tree.getRootId()) ); orig_root->removeSon(nearest); orig_root->addSon(new_root); new_root->addSon(nearest); new_root->setDistanceToFather(d / 2.); nearest->setDistanceToFather(d / 2.); const vector<string> branch_properties = nearest->getBranchPropertyNames(); for (vector<string>::const_iterator p = branch_properties.begin(); p != branch_properties.end(); ++p) { new_root->setBranchProperty(*p, *nearest->getBranchProperty(*p)); } tree.rootAt(new_root); } } }