Exemple #1
0
void Clustal::writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception)
{
  output << "CLUSTAL W (1.81) multiple sequence alignment" << endl;
  output << endl;
  if (sc.getNumberOfSequences() == 0)
    return;

  vector<string> text;
  size_t length = 0;
  for (size_t i = 0; i < sc.getNumberOfSequences(); ++i ) {
    const Sequence& seq = sc.getSequence(i);
    if (seq.getName().size() > length)
      length = seq.getName().size();
    text.push_back(sc.getSequence(i).toString());
  }
  length += nbSpacesBeforeSeq_;
  for (unsigned int j = 0; j < text[0].size(); j += charsByLine_) {
    for (unsigned int i = 0; i < sc.getNumberOfSequences(); ++i ) {
      output << TextTools::resizeRight(sc.getSequence(i).getName(), length);
      output << text[i].substr(j, charsByLine_) << endl;
    }
    output << endl;
  }
}
void SiteContainerTools::getSequencePositions(const SiteContainer& sites, Matrix<size_t>& positions)
{
  positions.resize(sites.getNumberOfSequences(), sites.getNumberOfSites());
  int gap = sites.getAlphabet()->getGapCharacterCode();
  for (size_t i = 0; i < sites.getNumberOfSequences(); ++i) {
    const Sequence& seq = sites.getSequence(i);
    unsigned int pos = 0;
    for (size_t j = 0; j < sites.getNumberOfSites(); ++j) {
      if (seq[j] != gap) {
        ++pos;
        positions(i, j) = pos;
      } else {
        positions(i, j) = 0;
      }
    }
  }
}
std::map<size_t, size_t> SiteContainerTools::translateSequence(const SiteContainer& sequences, size_t i1, size_t i2)
{
  const Sequence* seq1 = &sequences.getSequence(i1);
  const Sequence* seq2 = &sequences.getSequence(i2);
  map<size_t, size_t> tln;
  size_t count1 = 0; // Sequence 1 counter
  size_t count2 = 0; // Sequence 2 counter
  int state1;
  int state2;
  for (size_t i = 0; i <  sequences.getNumberOfSites(); i++)
  {
    state1 = (*seq1)[i];
    if (state1 != -1)
      count1++;
    state2 = (*seq2)[i];
    if (state2 != -1)
      count2++;
    if (state1 != -1)
    {
      tln[count1] = (state2 == -1 ? 0 : count2);
    }
  }
  return tln;
}
void RecursiveLikelihoodTree::initLikelihoodsWithoutPatterns_(const Node* node, const SiteContainer& sequences, const SubstitutionProcess& process) throw (Exception)
{
  int nId = node->getId();

  // Initialize likelihood vector:
  if (!node->hasFather())
  {
    resetAboveLikelihoods(nId, nbDistinctSites_, nbStates_);
    resetLikelihoods(nId, nbDistinctSites_, nbStates_, ComputingNode::D0);

    resetLikelihoods(nId, nbDistinctSites_, nbStates_, ComputingNode::D1);
    resetLikelihoods(nId, nbDistinctSites_, nbStates_, ComputingNode::D2);
  }


  resetBelowLikelihoods(nId, nbDistinctSites_, nbStates_, ComputingNode::D0);
  resetBelowLikelihoods(nId, nbDistinctSites_, nbStates_, ComputingNode::D1);
  resetBelowLikelihoods(nId, nbDistinctSites_, nbStates_, ComputingNode::D2);


  // Now initialize likelihood values and pointers:

  if (node->hasNoSon())
  {
    const Sequence* seq;
    try
    {
      seq = &sequences.getSequence(node->getName());
    }
    catch (SequenceNotFoundException snfe)
    {
      throw SequenceNotFoundException("RecursiveLikelihoodTree::initTreelikelihoods. Leaf name in tree not found in site conainer: ", (node->getName()));
    }

    for (size_t c = 0; c < nbClasses_; c++)
    {
      RecursiveLikelihoodNode& lNode = *dynamic_cast<RecursiveLikelihoodNode*>(vTree_[c]->getNode(nId));
      VVdouble& array = lNode.getBelowLikelihoodArray_(ComputingNode::D0);

      for (size_t i = 0; i < nbDistinctSites_; i++)
      {
        Vdouble* array_i = &array[i];
        int state = seq->getValue(i);
        double test = 0.;
        for (size_t s = 0; s < nbStates_; s++)
        {
          double x = process.getInitValue(s, state);
          if (lNode.usesLog())
          {
            if (x <= 0)
              (*array_i)[s] = -10000;
            else
              (*array_i)[s] = log(x);
          }
          else
            (*array_i)[s] = x;

          test += x;
        }
        if (test < 0.000001)
          std::cerr << "WARNING!!! Likelihood will be 0 for this site " << TextTools::toString(i) << std::endl;
      }
      lNode.updateBelow_(true, ComputingNode::D0);
    }
  }
  else
  {
    // 'node' is an internal node.
    std::map<int, std::vector<size_t> >* patternLinks_node = &patternLinks_[nId];
    int nbSonNodes = static_cast<int>(node->getNumberOfSons());
    for (int l = 0; l < nbSonNodes; ++l)
    {
      // For each son node,
      const Node* son = (*node)[l];
      initLikelihoodsWithoutPatterns_(son, sequences, process);
      std::vector<size_t>* patternLinks_node_son = &(*patternLinks_node)[son->getId()];

      // Init map:
      patternLinks_node_son->resize(nbDistinctSites_);

      for (size_t i = 0; i < nbDistinctSites_; i++)
      {
        (*patternLinks_node_son)[i] = i;
      }
    }
  }

  if (!node->hasFather())
    setAboveLikelihoods(nId, process.getRootFrequencies());
}