Esempio n. 1
0
ProbabilisticRewardMapping* RewardMappingTools::computeRewardVectors(
  const DRTreeLikelihood& drtl,
  const vector<int>& nodeIds,
  Reward& reward,
  bool verbose) throw (Exception)
{
  // Preamble:
  if (!drtl.isInitialized())
    throw Exception("RewardMappingTools::computeRewardVectors(). Likelihood object is not initialized.");

  // A few variables we'll need:

  const TreeTemplate<Node> tree(drtl.getTree());
  const SiteContainer*    sequences = drtl.getData();
  const DiscreteDistribution* rDist = drtl.getRateDistribution();

  size_t nbSites         = sequences->getNumberOfSites();
  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
  size_t nbStates        = sequences->getAlphabet()->getSize();
  size_t nbClasses       = rDist->getNumberOfCategories();
  vector<const Node*> nodes    = tree.getNodes();
  const vector<size_t>* rootPatternLinks
    = &drtl.getLikelihoodData()->getRootArrayPositions();
  nodes.pop_back(); // Remove root node.
  size_t nbNodes         = nodes.size();

  // We create a new ProbabilisticRewardMapping object:
  ProbabilisticRewardMapping* rewards = new ProbabilisticRewardMapping(tree, &reward, nbSites);

  // Store likelihood for each rate for each site:
  VVVdouble lik;
  drtl.computeLikelihoodAtNode(tree.getRootId(), lik);
  Vdouble Lr(nbDistinctSites, 0);
  Vdouble rcProbs = rDist->getProbabilities();
  Vdouble rcRates = rDist->getCategories();
  for (size_t i = 0; i < nbDistinctSites; i++)
  {
    VVdouble* lik_i = &lik[i];
    for (size_t c = 0; c < nbClasses; c++)
    {
      Vdouble* lik_i_c = &(*lik_i)[c];
      double rc = rDist->getProbability(c);
      for (size_t s = 0; s < nbStates; s++)
      {
        Lr[i] += (*lik_i_c)[s] * rc;
      }
    }
  }

  // Compute the reward for each class and each branch in the tree:
  if (verbose)
    ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);

  for (size_t l = 0; l < nbNodes; ++l)
  {
    // For each node,
    const Node* currentNode = nodes[l];
    if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
      continue;

    const Node* father = currentNode->getFather();

    double d = currentNode->getDistanceToFather();

    if (verbose)
      ApplicationTools::displayGauge(l, nbNodes - 1);
    Vdouble rewardsForCurrentNode(nbDistinctSites);

    // Now we've got to compute likelihoods in a smart manner... ;)
    VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
    for (size_t i = 0; i < nbDistinctSites; i++)
    {
      VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
      likelihoodsFatherConstantPart_i->resize(nbClasses);
      for (size_t c = 0; c < nbClasses; c++)
      {
        Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
        likelihoodsFatherConstantPart_i_c->resize(nbStates);
        double rc = rDist->getProbability(c);
        for (size_t s = 0; s < nbStates; s++)
        {
          // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
          // freq is already accounted in the array
          (*likelihoodsFatherConstantPart_i_c)[s] = rc;
        }
      }
    }

    // First, what will remain constant:
    size_t nbSons =  father->getNumberOfSons();
    for (size_t n = 0; n < nbSons; n++)
    {
      const Node* currentSon = father->getSon(n);
      if (currentSon->getId() != currentNode->getId())
      {
        const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());

        // Now iterate over all site partitions:
        auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId()));
        VVVdouble pxy;
        bool first;
        while (mit->hasNext())
        {
          TreeLikelihood::ConstBranchModelDescription* bmd = mit->next();
          auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
          first = true;
          while (sit->hasNext())
          {
            size_t i = sit->next();
            // We retrieve the transition probabilities for this site partition:
            if (first)
            {
              pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
              first = false;
            }
            const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
            VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
            for (size_t c = 0; c < nbClasses; c++)
            {
              const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
              Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
              VVdouble* pxy_c = &pxy[c];
              for (size_t x = 0; x < nbStates; x++)
              {
                Vdouble* pxy_c_x = &(*pxy_c)[x];
                double likelihood = 0.;
                for (size_t y = 0; y < nbStates; y++)
                {
                  likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y];
                }
                (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
              }
            }
          }
        }
      }
    }
    if (father->hasFather())
    {
      const Node* currentSon = father->getFather();
      const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
      // Now iterate over all site partitions:
      auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId()));
      VVVdouble pxy;
      bool first;
      while (mit->hasNext())
      {
        TreeLikelihood::ConstBranchModelDescription* bmd = mit->next();
        auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
        first = true;
        while (sit->hasNext())
        {
          size_t i = sit->next();
          // We retrieve the transition probabilities for this site partition:
          if (first)
          {
            pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i);
            first = false;
          }
          const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
          VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
          for (size_t c = 0; c < nbClasses; c++)
          {
            const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
            Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
            VVdouble* pxy_c = &pxy[c];
            for (size_t x = 0; x < nbStates; x++)
            {
              double likelihood = 0.;
              for (size_t y = 0; y < nbStates; y++)
              {
                Vdouble* pxy_c_x = &(*pxy_c)[y];
                likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y];
              }
              (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
            }
          }
        }
      }
    }
    else
    {
      // Account for root frequencies:
      for (size_t i = 0; i < nbDistinctSites; i++)
      {
        vector<double> freqs = drtl.getRootFrequencies(i);
        VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
        for (size_t c = 0; c < nbClasses; c++)
        {
          Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
          for (size_t x = 0; x < nbStates; x++)
          {
            (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x];
          }
        }
      }
    }

    // Then, we deal with the node of interest.
    // We first average upon 'y' to save computations, and then upon 'x'.
    // ('y' is the state at 'node' and 'x' the state at 'father'.)

    // Iterate over all site partitions:
    const VVVdouble* likelihoodsFather_node = &(drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId()));
    auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
    VVVdouble pxy;
    bool first;
    while (mit->hasNext())
    {
      TreeLikelihood::ConstBranchModelDescription* bmd = mit->next();
      reward.setSubstitutionModel(bmd->getModel());
      // compute all nxy first:
      VVVdouble nxy(nbClasses);
      for (size_t c = 0; c < nbClasses; ++c)
      {
        VVdouble* nxy_c = &nxy[c];
        double rc = rcRates[c];
        Matrix<double>* nij = reward.getAllRewards(d * rc);
        nxy_c->resize(nbStates);
        for (size_t x = 0; x < nbStates; ++x)
        {
          Vdouble* nxy_c_x = &(*nxy_c)[x];
          nxy_c_x->resize(nbStates);
          for (size_t y = 0; y < nbStates; ++y)
          {
            (*nxy_c_x)[y] = (*nij)(x, y);
          }
        }
        delete nij;
      }

      // Now loop over sites:
      auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
      first = true;
      while (sit->hasNext())
      {
        size_t i = sit->next();
        // We retrieve the transition probabilities and substitution counts for this site partition:
        if (first)
        {
          pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
          first = false;
        }
        const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i];
        VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
        for (size_t c = 0; c < nbClasses; ++c)
        {
          const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c];
          Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
          const VVdouble* pxy_c = &pxy[c];
          VVdouble* nxy_c = &nxy[c];
          for (size_t x = 0; x < nbStates; ++x)
          {
            double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x];
            const Vdouble* pxy_c_x = &(*pxy_c)[x];
            for (size_t y = 0; y < nbStates; ++y)
            {
              double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x)
                                      * (*pxy_c_x)[y]
                                      * (*likelihoodsFather_node_i_c)[y];

              // Now the vector computation:
              rewardsForCurrentNode[i] += likelihood_cxy * (*nxy_c)[x][y];
              //                       <------------>   <--------------->
              // Posterior probability         |                 |
              // for site i and rate class c * |                 |
              // likelihood for this site------+                 |
              //                                                 |
              // Reward function for site i and rate class c------+
            }
          }
        }
      }
    }

    // Now we just have to copy the substitutions into the result vector:
    for (size_t i = 0; i < nbSites; ++i)
    {
      (*rewards)(l, i) = rewardsForCurrentNode[(*rootPatternLinks)[i]] / Lr[(*rootPatternLinks)[i]];
    }
  }
  if (verbose)
  {
    if (ApplicationTools::message)
      *ApplicationTools::message << " ";
    ApplicationTools::displayTaskDone();
  }
  return rewards;
}
Esempio n. 2
0
void TreeTemplateTools::midRoot(TreeTemplate<Node>& tree, short criterion, bool forceBranchRoot)
{
  if (criterion != MIDROOT_VARIANCE && criterion != MIDROOT_SUM_OF_SQUARES)
    throw Exception("TreeTemplateTools::midRoot(). Illegal criterion value '" + TextTools::toString(criterion) + "'");

  if (tree.isRooted())
    tree.unroot();
  Node* ref_root = tree.getRootNode();
  //
  // The bestRoot object records :
  // -- the current best branch : .first
  // -- the current best value of the criterion : .second["value"]
  // -- the best position of the root on the branch : .second["position"]
  //      0 is toward the original root, 1 is away from it
  //
  pair<Node*, map<string, double> > best_root_branch;
  best_root_branch.first = ref_root; // nota: the root does not correspond to a branch as it has no father
  best_root_branch.second ["position"] = -1;
  best_root_branch.second ["score"] = numeric_limits<double>::max();

  // find the best root
  getBestRootInSubtree_(tree, criterion, ref_root, best_root_branch);
  tree.rootAt(ref_root); // back to the original root

  // reroot
  const double pos = best_root_branch.second["position"];
  if (pos < 1e-6 or pos > 1 - 1e-6)
    // The best root position is on a node (this is often the case with the sum of squares criterion)
    tree.rootAt(pos < 1e-6 ? best_root_branch.first->getFather() : best_root_branch.first);
  else
  // The best root position is somewhere on a branch (a new Node is created)
  {
    Node* new_root = new Node();
    new_root->setId( TreeTools::getMPNUId(tree, tree.getRootId()) );

    double root_branch_length = best_root_branch.first->getDistanceToFather();
    Node* best_root_father = best_root_branch.first->getFather();

    best_root_father->removeSon(best_root_branch.first);
    best_root_father->addSon(new_root);
    new_root->addSon(best_root_branch.first);

    new_root->setDistanceToFather(max(pos * root_branch_length, 1e-6));
    best_root_branch.first->setDistanceToFather(max((1 - pos) * root_branch_length, 1e-6));

    // The two branches leaving the root must have the same branch properties
    const vector<string> branch_properties = best_root_branch.first->getBranchPropertyNames();
    for (vector<string>::const_iterator p = branch_properties.begin(); p != branch_properties.end(); ++p)
    {
      new_root->setBranchProperty(*p, *best_root_branch.first->getBranchProperty(*p));
    }

    tree.rootAt(new_root);
  }

  if (forceBranchRoot) // if we want the root to be on a branch, not on a node
  {
    Node* orig_root = tree.getRootNode();
    vector<Node*> root_sons = orig_root->getSons();
    if (root_sons.size() > 2)
    {
      Node* nearest = root_sons.at(0);
      for (vector<Node*>::iterator n = root_sons.begin(); n !=
           root_sons.end(); ++n)
      {
        if ((**n).getDistanceToFather() < nearest->getDistanceToFather())
          nearest = *n;
      }
      const double d = nearest->getDistanceToFather();
      Node* new_root = new Node();
      new_root->setId( TreeTools::getMPNUId(tree, tree.getRootId()) );
      orig_root->removeSon(nearest);
      orig_root->addSon(new_root);
      new_root->addSon(nearest);
      new_root->setDistanceToFather(d / 2.);
      nearest->setDistanceToFather(d / 2.);
      const vector<string> branch_properties = nearest->getBranchPropertyNames();
      for (vector<string>::const_iterator p = branch_properties.begin(); p != branch_properties.end(); ++p)
      {
        new_root->setBranchProperty(*p, *nearest->getBranchProperty(*p));
      }
      tree.rootAt(new_root);
    }
  }
}