string Alignment::get_abayes_tree() { TreeTemplate<Node> tree = TreeTemplate<Node>(likelihood->getTree()); std::map<int, nniIDs> nniMap; for (auto& node : tree.getNodes()) { if (node->hasFather() && node->getFather()->hasFather()) { auto search = nniMap.find(node->getFatherId()); if (search == nniMap.end()) { nniMap[node->getFatherId()].rearr1 = node->getId(); } else { search->second.rearr2 = node->getId(); }; } } for (auto entry : nniMap) { double lnl1 = -likelihood->testNNI(entry.second.rearr1); double lnl2 = -likelihood->testNNI(entry.second.rearr2); bpp::Number<double> abayes = 1 / (1 + exp(lnl1) + exp(lnl2)); tree.setBranchProperty(entry.first, TreeTools::BOOTSTRAP, abayes); } string s = TreeTools::treeToParenthesis(tree, true, TreeTools::BOOTSTRAP); s.erase(s.find_last_not_of(" \n\r\t")+1); return s; }
ProbabilisticRewardMapping* RewardMappingTools::computeRewardVectors( const DRTreeLikelihood& drtl, const vector<int>& nodeIds, Reward& reward, bool verbose) throw (Exception) { // Preamble: if (!drtl.isInitialized()) throw Exception("RewardMappingTools::computeRewardVectors(). Likelihood object is not initialized."); // A few variables we'll need: const TreeTemplate<Node> tree(drtl.getTree()); const SiteContainer* sequences = drtl.getData(); const DiscreteDistribution* rDist = drtl.getRateDistribution(); size_t nbSites = sequences->getNumberOfSites(); size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites(); size_t nbStates = sequences->getAlphabet()->getSize(); size_t nbClasses = rDist->getNumberOfCategories(); vector<const Node*> nodes = tree.getNodes(); const vector<size_t>* rootPatternLinks = &drtl.getLikelihoodData()->getRootArrayPositions(); nodes.pop_back(); // Remove root node. size_t nbNodes = nodes.size(); // We create a new ProbabilisticRewardMapping object: ProbabilisticRewardMapping* rewards = new ProbabilisticRewardMapping(tree, &reward, nbSites); // Store likelihood for each rate for each site: VVVdouble lik; drtl.computeLikelihoodAtNode(tree.getRootId(), lik); Vdouble Lr(nbDistinctSites, 0); Vdouble rcProbs = rDist->getProbabilities(); Vdouble rcRates = rDist->getCategories(); for (size_t i = 0; i < nbDistinctSites; i++) { VVdouble* lik_i = &lik[i]; for (size_t c = 0; c < nbClasses; c++) { Vdouble* lik_i_c = &(*lik_i)[c]; double rc = rDist->getProbability(c); for (size_t s = 0; s < nbStates; s++) { Lr[i] += (*lik_i_c)[s] * rc; } } } // Compute the reward for each class and each branch in the tree: if (verbose) ApplicationTools::displayTask("Compute joint node-pairs likelihood", true); for (size_t l = 0; l < nbNodes; ++l) { // For each node, const Node* currentNode = nodes[l]; if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId())) continue; const Node* father = currentNode->getFather(); double d = currentNode->getDistanceToFather(); if (verbose) ApplicationTools::displayGauge(l, nbNodes - 1); Vdouble rewardsForCurrentNode(nbDistinctSites); // Now we've got to compute likelihoods in a smart manner... ;) VVVdouble likelihoodsFatherConstantPart(nbDistinctSites); for (size_t i = 0; i < nbDistinctSites; i++) { VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; likelihoodsFatherConstantPart_i->resize(nbClasses); for (size_t c = 0; c < nbClasses; c++) { Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; likelihoodsFatherConstantPart_i_c->resize(nbStates); double rc = rDist->getProbability(c); for (size_t s = 0; s < nbStates; s++) { // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s); // freq is already accounted in the array (*likelihoodsFatherConstantPart_i_c)[s] = rc; } } } // First, what will remain constant: size_t nbSons = father->getNumberOfSons(); for (size_t n = 0; n < nbSons; n++) { const Node* currentSon = father->getSon(n); if (currentSon->getId() != currentNode->getId()) { const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId()); // Now iterate over all site partitions: auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId())); VVVdouble pxy; bool first; while (mit->hasNext()) { TreeLikelihood::ConstBranchModelDescription* bmd = mit->next(); auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator()); first = true; while (sit->hasNext()) { size_t i = sit->next(); // We retrieve the transition probabilities for this site partition: if (first) { pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i); first = false; } const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i]; VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; c++) { const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c]; Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; VVdouble* pxy_c = &pxy[c]; for (size_t x = 0; x < nbStates; x++) { Vdouble* pxy_c_x = &(*pxy_c)[x]; double likelihood = 0.; for (size_t y = 0; y < nbStates; y++) { likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y]; } (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood; } } } } } } if (father->hasFather()) { const Node* currentSon = father->getFather(); const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId()); // Now iterate over all site partitions: auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId())); VVVdouble pxy; bool first; while (mit->hasNext()) { TreeLikelihood::ConstBranchModelDescription* bmd = mit->next(); auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator()); first = true; while (sit->hasNext()) { size_t i = sit->next(); // We retrieve the transition probabilities for this site partition: if (first) { pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i); first = false; } const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i]; VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; c++) { const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c]; Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; VVdouble* pxy_c = &pxy[c]; for (size_t x = 0; x < nbStates; x++) { double likelihood = 0.; for (size_t y = 0; y < nbStates; y++) { Vdouble* pxy_c_x = &(*pxy_c)[y]; likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y]; } (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood; } } } } } else { // Account for root frequencies: for (size_t i = 0; i < nbDistinctSites; i++) { vector<double> freqs = drtl.getRootFrequencies(i); VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; c++) { Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; for (size_t x = 0; x < nbStates; x++) { (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x]; } } } } // Then, we deal with the node of interest. // We first average upon 'y' to save computations, and then upon 'x'. // ('y' is the state at 'node' and 'x' the state at 'father'.) // Iterate over all site partitions: const VVVdouble* likelihoodsFather_node = &(drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId())); auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId())); VVVdouble pxy; bool first; while (mit->hasNext()) { TreeLikelihood::ConstBranchModelDescription* bmd = mit->next(); reward.setSubstitutionModel(bmd->getModel()); // compute all nxy first: VVVdouble nxy(nbClasses); for (size_t c = 0; c < nbClasses; ++c) { VVdouble* nxy_c = &nxy[c]; double rc = rcRates[c]; Matrix<double>* nij = reward.getAllRewards(d * rc); nxy_c->resize(nbStates); for (size_t x = 0; x < nbStates; ++x) { Vdouble* nxy_c_x = &(*nxy_c)[x]; nxy_c_x->resize(nbStates); for (size_t y = 0; y < nbStates; ++y) { (*nxy_c_x)[y] = (*nij)(x, y); } } delete nij; } // Now loop over sites: auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator()); first = true; while (sit->hasNext()) { size_t i = sit->next(); // We retrieve the transition probabilities and substitution counts for this site partition: if (first) { pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i); first = false; } const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i]; VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; ++c) { const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c]; Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; const VVdouble* pxy_c = &pxy[c]; VVdouble* nxy_c = &nxy[c]; for (size_t x = 0; x < nbStates; ++x) { double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x]; const Vdouble* pxy_c_x = &(*pxy_c)[x]; for (size_t y = 0; y < nbStates; ++y) { double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x) * (*pxy_c_x)[y] * (*likelihoodsFather_node_i_c)[y]; // Now the vector computation: rewardsForCurrentNode[i] += likelihood_cxy * (*nxy_c)[x][y]; // <------------> <---------------> // Posterior probability | | // for site i and rate class c * | | // likelihood for this site------+ | // | // Reward function for site i and rate class c------+ } } } } } // Now we just have to copy the substitutions into the result vector: for (size_t i = 0; i < nbSites; ++i) { (*rewards)(l, i) = rewardsForCurrentNode[(*rootPatternLinks)[i]] / Lr[(*rootPatternLinks)[i]]; } } if (verbose) { if (ApplicationTools::message) *ApplicationTools::message << " "; ApplicationTools::displayTaskDone(); } return rewards; }