Esempio n. 1
0
string Alignment::get_abayes_tree() {
    TreeTemplate<Node> tree = TreeTemplate<Node>(likelihood->getTree());
    std::map<int, nniIDs> nniMap;

    for (auto& node : tree.getNodes()) {
        if (node->hasFather() && node->getFather()->hasFather()) {
            auto search = nniMap.find(node->getFatherId());
            if (search == nniMap.end()) {
                nniMap[node->getFatherId()].rearr1 = node->getId();
            }
            else {
                search->second.rearr2 = node->getId();
            };
        }
    }

    for (auto entry : nniMap) {
        double lnl1 = -likelihood->testNNI(entry.second.rearr1);
        double lnl2 = -likelihood->testNNI(entry.second.rearr2);
        bpp::Number<double> abayes = 1 / (1 + exp(lnl1) + exp(lnl2));
        tree.setBranchProperty(entry.first, TreeTools::BOOTSTRAP, abayes);
    }

    string s = TreeTools::treeToParenthesis(tree, true, TreeTools::BOOTSTRAP);
    s.erase(s.find_last_not_of(" \n\r\t")+1);
    return s;
}
int main() {
  TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("((A:0.001, B:0.002):0.003,C:0.01,D:0.1);");
  cout << tree->getNumberOfLeaves() << endl;
  vector<int> ids = tree->getNodesId();
  //-------------

  NucleicAlphabet* alphabet = new DNA();
  SubstitutionModel* model = new GTR(alphabet, 1, 0.2, 0.3, 0.4, 0.4, 0.1, 0.35, 0.35, 0.2);
  //DiscreteDistribution* rdist = new GammaDiscreteDistribution(4, 0.4, 0.4);
  DiscreteDistribution* rdist = new ConstantDistribution(1.0);
  HomogeneousSequenceSimulator simulator(model, rdist, tree);

  unsigned int n = 100000;
  map<int, RowMatrix<unsigned int> > counts;
  for (size_t j = 0; j < ids.size() - 1; ++j) //ignore root, the last id
    counts[ids[j]].resize(4, 4);
  for (unsigned int i = 0; i < n; ++i) {
    RASiteSimulationResult* result = simulator.dSimulate();
    for (size_t j = 0; j < ids.size() - 1; ++j) { //ignore root, the last id
      result->getMutationPath(ids[j]).getEventCounts(counts[ids[j]]);
    }
    delete result;
  }
  map<int, RowMatrix<double> >freqs;
  map<int, double> sums;
  for (size_t k = 0; k < ids.size() - 1; ++k) { //ignore root, the last id
    RowMatrix<double>* freqsP = &freqs[ids[k]];
    RowMatrix<unsigned int>* countsP = &counts[ids[k]];
    freqsP->resize(4, 4);
    for (unsigned int i = 0; i < 4; ++i)
      for (unsigned int j = 0; j < 4; ++j)
        (*freqsP)(i, j) = static_cast<double>((*countsP)(i, j)) / (static_cast<double>(n));
    
    //For now we simply compare the total number of substitutions:
    sums[ids[k]] = MatrixTools::sumElements(*freqsP);
  
    cout << "Br" << ids[k] << " BrLen = " << tree->getDistanceToFather(ids[k]) << " counts = " << sums[ids[k]] << endl;
    MatrixTools::print(*freqsP);
  }
  //We should compare this matrix with the expected one!

  for (size_t k = 0; k < ids.size() - 1; ++k) { //ignore root, the last id
    if (abs(sums[ids[k]] - tree->getDistanceToFather(ids[k])) > 0.01) {
      delete tree;
      delete alphabet;
      delete model;
      delete rdist;
      return 1;
    }
  }
  //-------------
  delete tree;
  delete alphabet;
  delete model;
  delete rdist;

  //return (abs(obs - 0.001) < 0.001 ? 0 : 1);
  return 0;
}
Esempio n. 3
0
string TreeTemplateTools::treeToParenthesis(const TreeTemplate<Node>& tree, bool writeId)
{
  ostringstream s;
  s << "(";
  const Node* node = tree.getRootNode();
  if (node->hasNoSon())
  {
    s << node->getName();
    for (size_t i = 0; i < node->getNumberOfSons(); ++i)
    {
      s << "," << nodeToParenthesis(*node->getSon(i), writeId);
    }
  }
  else
  {
    s << nodeToParenthesis(*node->getSon(0), writeId);
    for (size_t i = 1; i < node->getNumberOfSons(); ++i)
    {
      s << "," << nodeToParenthesis(*node->getSon(i), writeId);
    }
  }
  s << ")";
  if (node->hasDistanceToFather())
    s << ":" << node->getDistanceToFather();
  s << ";" << endl;
  return s.str();
}
Esempio n. 4
0
double TreeTemplateTools::getRadius(TreeTemplate<Node>& tree)
{
  TreeTemplateTools::midRoot(tree, MIDROOT_SUM_OF_SQUARES, false);
  Moments_ moments = getSubtreeMoments_(tree.getRootNode());
  double radius = moments.sum / moments.numberOfLeaves;
  return radius;
}
Esempio n. 5
0
TreeTemplate<Node>* TreeTemplateTools::parenthesisToTree(const string& description, bool bootstrap, const string& propertyName, bool withId, bool verbose) throw (Exception)
{
  string::size_type semi = description.rfind(';');
  if (semi == string::npos)
    throw Exception("TreeTemplateTools::parenthesisToTree(). Bad format: no semi-colon found.");
  string content = description.substr(0, semi);
  unsigned int nodeCounter = 0;
  Node* node = parenthesisToNode(content, nodeCounter, bootstrap, propertyName, withId, verbose);
  TreeTemplate<Node>* tree = new TreeTemplate<Node>();
  tree->setRootNode(node);
  if (!withId)
  {
    tree->resetNodesId();
  }
  if (verbose) {
    (*ApplicationTools::message) << " nodes loaded.";
    ApplicationTools::message->endLine();
  }
  return tree;
}
RecursiveLikelihoodTree::RecursiveLikelihoodTree(const SubstitutionProcess& process, bool usepatterns) :
  AbstractLikelihoodTree(process),
  vTree_(),
  patternLinks_(),
  usePatterns_(usepatterns),
  initializedAboveLikelihoods_(false)
{
  TreeTemplate<Node> tree = process.getParametrizableTree().getTree();

  RecursiveLikelihoodNode* rCN = TreeTemplateTools::cloneSubtree<RecursiveLikelihoodNode>(*tree.getRootNode());

  TreeTemplate<RecursiveLikelihoodNode>* pTC = new TreeTemplate<RecursiveLikelihoodNode>(rCN);

  for (size_t i = 0; i < nbClasses_; i++)
  {
    TreeTemplate<RecursiveLikelihoodNode>* pTC2 = pTC->clone();
    vTree_.push_back(pTC2);
  }

  delete pTC;
}
Esempio n. 7
0
Tree::Tree( const TreeTemplate& tmpl ) :
mPosition( 0.0, 0.0, 0.0 ),
mAngle( 0.0, 0.0, 0.0 ),
mScale( 1.0, 1.0, 1.0 ),
mNodes( 0 ),
mNodeNumber( 0 ),
mTime( 0.0 ){
	mNodeNumber = tmpl.nodeNumber();
	mNodes = new Node[ mNodeNumber ];
	for ( int i = 0; i < mNodeNumber; ++i ){
		Node& dst = mNodes[ i ];
		const NodeTemplate* src = tmpl.node( i );
		//パラメータを移す
		dst.setTranslation( *src->translation() );
		dst.setRotation( *src->rotation() );
		dst.setScale( *src->scale() );
		dst.setName( src->name()->c_str() );
		dst.setBatch( src->batch() );

		//[長男-兄弟形式から子配列形式への変換]
		//子の数を数える
		int child = src->child();
		int childNumber = 0;
		while ( child >= 0 ){
			++childNumber;
			child = tmpl.node( child )->brother();
		}
		//子をアロケート
		dst.setChildNumber( childNumber );
		//子を充填する
		child = src->child();
		int j = 0;
		while ( child >= 0 ){
			dst.setChild( j, mNodes + child );
			child = tmpl.node( child )->brother();
			++j;
		}
	}
}
Esempio n. 8
0
TreeTemplate<Node>* TreeTemplateTools::getRandomTree(vector<string>& leavesNames, bool rooted)
{
  if (leavesNames.size() == 0)
    return 0;                                // No taxa.
  // This vector will contain all nodes.
  // Start with all leaves, and then group nodes randomly 2 by 2.
  // Att the end, contains only the root node of the tree.
  vector<Node*> nodes(leavesNames.size());
  // Create all leaves nodes:
  for (size_t i = 0; i < leavesNames.size(); ++i)
  {
    nodes[i] = new Node(leavesNames[i]);
  }
  // Now group all nodes:
  while (nodes.size() > (rooted ? 2 : 3))
  {
    // Select random nodes:
    size_t pos1 = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(nodes.size());
    Node* node1 = nodes[pos1];
    nodes.erase(nodes.begin() + static_cast<ptrdiff_t>(pos1));
    size_t pos2 = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(nodes.size());
    Node* node2 = nodes[pos2];
    nodes.erase(nodes.begin() + static_cast<ptrdiff_t>(pos2));
    // Add new node:
    Node* parent = new Node();
    parent->addSon(node1);
    parent->addSon(node2);
    nodes.push_back(parent);
  }
  // Return tree with last node as root node:
  Node* root = new Node();
  for (size_t i = 0; i < nodes.size(); ++i)
  {
    root->addSon(nodes[i]);
  }
  TreeTemplate<Node>* tree = new TreeTemplate<Node>(root);
  tree->resetNodesId();
  return tree;
}
int main() {
  TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("(((A:0.01, B:0.01):0.02,C:0.03):0.01,D:0.04);");
  vector<string> seqNames = tree->getLeavesNames();
  vector<int> ids = tree->getNodesId();
  //-------------

  const NucleicAlphabet* alphabet = &AlphabetTools::DNA_ALPHABET;
  SubstitutionModel* model = new T92(alphabet, 3.);
  DiscreteDistribution* rdist = new GammaDiscreteDistribution(4, 1.0);
  rdist->aliasParameters("alpha", "beta");

  VectorSiteContainer sites(alphabet);
  sites.addSequence(BasicSequence("A", "AAATGGCTGTGCACGTC", alphabet));
  sites.addSequence(BasicSequence("B", "AACTGGATCTGCATGTC", alphabet));
  sites.addSequence(BasicSequence("C", "ATCTGGACGTGCACGTG", alphabet));
  sites.addSequence(BasicSequence("D", "CAACGGGAGTGCGCCTA", alphabet));

  try {
    fitModelH(model, rdist, *tree, sites, 93.017264552603336369, 71.265543199977557265);
  } catch (Exception& ex) {
    cerr << ex.what() << endl;
    return 1;
  }
  try {
    fitModelHClock(model, rdist, *tree, sites, 92.27912072473920090943, 71.26554020984087856050);
  } catch (Exception& ex) {
    cerr << ex.what() << endl;
    return 1;
  }

  //-------------
  delete tree;
  delete model;
  delete rdist;

  return 0;
}
Esempio n. 10
0
string TreeTemplateTools::treeToParenthesis(const TreeTemplate<Node>& tree, bool bootstrap, const string& propertyName)
{
  ostringstream s;
  s << "(";
  const Node* node = tree.getRootNode();
  if (node->hasNoSon())
  {
    s << node->getName();
    for (size_t i = 0; i < node->getNumberOfSons(); i++)
    {
      s << "," << nodeToParenthesis(*node->getSon(i), bootstrap, propertyName);
    }
  }
  else
  {
    s << nodeToParenthesis(*node->getSon(0), bootstrap, propertyName);
    for (size_t i = 1; i < node->getNumberOfSons(); i++)
    {
      s << "," << nodeToParenthesis(*node->getSon(i), bootstrap, propertyName);
    }
  }
  s << ")";
  if (bootstrap)
  {
    if (node->hasBranchProperty(TreeTools::BOOTSTRAP))
      s << (dynamic_cast<const Number<double>*>(node->getBranchProperty(TreeTools::BOOTSTRAP))->getValue());
  }
  else
  {
    if (node->hasBranchProperty(propertyName))
    {
      const BppString* ppt = dynamic_cast<const BppString*>(node->getBranchProperty(propertyName));
      if (ppt)
        s << *ppt;
      else
        throw Exception("TreeTemplateTools::nodeToParenthesis. Property should be a BppString.");
    }
  }
  s << ";" << endl;
  return s.str();
}
Esempio n. 11
0
void NexusIOTree::read(std::istream& in, std::vector<Tree*>& trees) const throw (Exception)
{
	// Checking the existence of specified file
	if (! in) { throw IOException ("NexusIOTree::read(). Failed to read from stream"); }
	
  //Look for the TREES block:
  string line = "";
  while (TextTools::toUpper(line) != "BEGIN TREES;")
  {
    if (in.eof())
      throw Exception("NexusIOTree::read(). No trees block was found.");
    line = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(in));
  }
  
  string cmdName = "", cmdArgs = "";
  bool cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
  if (! cmdFound)
    throw Exception("NexusIOTree::read(). Missing tree command.");
  cmdName = TextTools::toUpper(cmdName);

  //Look for the TRANSLATE command:
  map<string, string> translation;
  bool hasTranslation = false;
  if (cmdName == "TRANSLATE")
  {
    //Parse translation:
    StringTokenizer st(cmdArgs, ",");
    while (st.hasMoreToken())
    {
      string tok = TextTools::removeSurroundingWhiteSpaces(st.nextToken());
      NestedStringTokenizer nst(tok, "'", "'", " \t");
      if (nst.numberOfRemainingTokens() != 2)
        throw Exception("NexusIOTree::read(). Unvalid translation description.");
      string name = nst.nextToken();
      string tln  = nst.nextToken();
      translation[name] = tln;
    }
    hasTranslation = true;
    cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
    if (! cmdFound)
      throw Exception("NexusIOTree::read(). Missing tree command.");
    else
      cmdName = TextTools::toUpper(cmdName);
  }

  //Now parse the trees:
  while (cmdFound && cmdName != "END")
  {
    if (cmdName != "TREE")
      throw Exception("NexusIOTree::read(). Unvalid command found: " + cmdName);
    string::size_type pos = cmdArgs.find("=");
    if (pos == string::npos)
      throw Exception("NexusIOTree::read(). unvalid format, should be tree-name=tree-description");
    string description = cmdArgs.substr(pos + 1);
	  TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree(description + ";", true);

    //Now translate leaf names if there is a translation:
    //(we assume that all trees share the same translation! ===> check!)
    if (hasTranslation)
    {
      vector<Node*> leaves = tree->getLeaves();
      for (size_t i = 0; i < leaves.size(); i++)
      {
        string name = leaves[i]->getName();
        if (translation.find(name) == translation.end())
        {
          throw Exception("NexusIOTree::read(). No translation was given for this leaf: " + name);
        }
        leaves[i]->setName(translation[name]);
      }
    }
    trees.push_back(tree);
    cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false);
    if (cmdFound) cmdName = TextTools::toUpper(cmdName);
  }
}
Esempio n. 12
0
void TreeTemplateTools::midRoot(TreeTemplate<Node>& tree, short criterion, bool forceBranchRoot)
{
  if (criterion != MIDROOT_VARIANCE && criterion != MIDROOT_SUM_OF_SQUARES)
    throw Exception("TreeTemplateTools::midRoot(). Illegal criterion value '" + TextTools::toString(criterion) + "'");

  if (tree.isRooted())
    tree.unroot();
  Node* ref_root = tree.getRootNode();
  //
  // The bestRoot object records :
  // -- the current best branch : .first
  // -- the current best value of the criterion : .second["value"]
  // -- the best position of the root on the branch : .second["position"]
  //      0 is toward the original root, 1 is away from it
  //
  pair<Node*, map<string, double> > best_root_branch;
  best_root_branch.first = ref_root; // nota: the root does not correspond to a branch as it has no father
  best_root_branch.second ["position"] = -1;
  best_root_branch.second ["score"] = numeric_limits<double>::max();

  // find the best root
  getBestRootInSubtree_(tree, criterion, ref_root, best_root_branch);
  tree.rootAt(ref_root); // back to the original root

  // reroot
  const double pos = best_root_branch.second["position"];
  if (pos < 1e-6 or pos > 1 - 1e-6)
    // The best root position is on a node (this is often the case with the sum of squares criterion)
    tree.rootAt(pos < 1e-6 ? best_root_branch.first->getFather() : best_root_branch.first);
  else
  // The best root position is somewhere on a branch (a new Node is created)
  {
    Node* new_root = new Node();
    new_root->setId( TreeTools::getMPNUId(tree, tree.getRootId()) );

    double root_branch_length = best_root_branch.first->getDistanceToFather();
    Node* best_root_father = best_root_branch.first->getFather();

    best_root_father->removeSon(best_root_branch.first);
    best_root_father->addSon(new_root);
    new_root->addSon(best_root_branch.first);

    new_root->setDistanceToFather(max(pos * root_branch_length, 1e-6));
    best_root_branch.first->setDistanceToFather(max((1 - pos) * root_branch_length, 1e-6));

    // The two branches leaving the root must have the same branch properties
    const vector<string> branch_properties = best_root_branch.first->getBranchPropertyNames();
    for (vector<string>::const_iterator p = branch_properties.begin(); p != branch_properties.end(); ++p)
    {
      new_root->setBranchProperty(*p, *best_root_branch.first->getBranchProperty(*p));
    }

    tree.rootAt(new_root);
  }

  if (forceBranchRoot) // if we want the root to be on a branch, not on a node
  {
    Node* orig_root = tree.getRootNode();
    vector<Node*> root_sons = orig_root->getSons();
    if (root_sons.size() > 2)
    {
      Node* nearest = root_sons.at(0);
      for (vector<Node*>::iterator n = root_sons.begin(); n !=
           root_sons.end(); ++n)
      {
        if ((**n).getDistanceToFather() < nearest->getDistanceToFather())
          nearest = *n;
      }
      const double d = nearest->getDistanceToFather();
      Node* new_root = new Node();
      new_root->setId( TreeTools::getMPNUId(tree, tree.getRootId()) );
      orig_root->removeSon(nearest);
      orig_root->addSon(new_root);
      new_root->addSon(nearest);
      new_root->setDistanceToFather(d / 2.);
      nearest->setDistanceToFather(d / 2.);
      const vector<string> branch_properties = nearest->getBranchPropertyNames();
      for (vector<string>::const_iterator p = branch_properties.begin(); p != branch_properties.end(); ++p)
      {
        new_root->setBranchProperty(*p, *nearest->getBranchProperty(*p));
      }
      tree.rootAt(new_root);
    }
  }
}
Esempio n. 13
0
int main() {

  TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("(((A:0.1, B:0.2):0.3,C:0.1):0.2,(D:0.3,(E:0.2,F:0.05):0.1):0.1);");

  vector<string> seqNames= tree->getLeavesNames();
  vector<int> ids = tree->getNodesId();
  //-------------

  const NucleicAlphabet* alphabet = &AlphabetTools::DNA_ALPHABET;
  FrequenciesSet* rootFreqs = new GCFrequenciesSet(alphabet);
  
  SubstitutionModel* model = new T92(alphabet, 3.);
  std::vector<std::string> globalParameterNames;
  globalParameterNames.push_back("T92.kappa");

  //Very difficult to optimize on small datasets:
  DiscreteDistribution* rdist = new GammaDiscreteRateDistribution(4, 1.0);
  
  ParametrizableTree* parTree = new ParametrizableTree(*tree);
  FrequenciesSet* rootFreqs2 = rootFreqs->clone();
  DiscreteDistribution* rdist2 = rdist->clone();
  SubstitutionModel* model2=model->clone();

  map<string, string> alias;

  SubstitutionModelSet* modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, alias, globalParameterNames);
  unique_ptr<SubstitutionModelSet> modelSetSim(modelSet->clone());

  NonHomogeneousSubstitutionProcess* subPro= NonHomogeneousSubstitutionProcess::createNonHomogeneousSubstitutionProcess(model2, rdist2, rootFreqs2, parTree, globalParameterNames);

  // Simulation
    
  size_t nsites = 1000;
  unsigned int nrep = 20;
  size_t nmodels = modelSet->getNumberOfModels();
  vector<double> thetas(nmodels);
  vector<double> thetasEst1(nmodels);
  vector<double> thetasEst2(nmodels);
  vector<double> thetasEst1n(nmodels);
  vector<double> thetasEst2n(nmodels);

  for (size_t i = 0; i < nmodels; ++i) {
    double theta = RandomTools::giveRandomNumberBetweenZeroAndEntry(0.99) + 0.005;
    cout << "Theta" << i << " set to " << theta << endl; 
    modelSetSim->setParameterValue("T92.theta_" + TextTools::toString(i + 1), theta);
    //subPro->setParameterValue("T92.theta_" + TextTools::toString(i + 1), theta);
    thetas[i] = theta;
  }

  NonHomogeneousSequenceSimulator simulator(modelSetSim.get(), rdist, tree);

  NonHomogeneousSubstitutionProcess* subPro2 = subPro->clone();

  for (unsigned int j = 0; j < nrep; j++) {

    OutputStream* profiler  = new StlOutputStream(new ofstream("profile.txt", ios::out));
    OutputStream* messenger = new StlOutputStream(new ofstream("messages.txt", ios::out));

    //Simulate data:
    unique_ptr<SiteContainer> sites(simulator.simulate(nsites));

    //Now fit model:
    unique_ptr<SubstitutionModelSet> modelSet2(modelSet->clone());

    RNonHomogeneousTreeLikelihood tl(*tree, *sites.get(), modelSet, rdist, true, true, false);
    tl.initialize();

    RNonHomogeneousTreeLikelihood tl2(*tree, *sites.get(), modelSet2.get(), rdist, true, true, true);
    tl2.initialize();

    SubstitutionProcess* nsubPro=subPro->clone();
    SubstitutionProcess* nsubPro2=subPro2->clone();
    
    RecursiveLikelihoodTreeCalculation* tlComp = new RecursiveLikelihoodTreeCalculation(*sites->clone(), nsubPro, true, false);
    SingleProcessPhyloLikelihood ntl(nsubPro, tlComp, true);

    RecursiveLikelihoodTreeCalculation* tlComp2 = new RecursiveLikelihoodTreeCalculation(*sites->clone(), nsubPro2, true);
    SingleProcessPhyloLikelihood ntl2(nsubPro2, tlComp2, true);

    for (size_t i = 0; i < nmodels; ++i) {
      ntl.setParameterValue("T92.theta_" + TextTools::toString(i + 1), thetas[i]);
      ntl2.setParameterValue("T92.theta_" + TextTools::toString(i + 1), thetas[i]);
    }

    cout << setprecision(10) << "OldTL init: "  << tl.getValue() << "\t" << tl2.getValue() << endl;
    cout << setprecision(10) << "NewTL init: "  << ntl.getValue() << "\t" << ntl2.getValue() << endl;

    unsigned int c1 = OptimizationTools::optimizeNumericalParameters2(
      &tl, tl.getParameters(), 0,
      0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);
    
    
    unsigned int c2 = OptimizationTools::optimizeNumericalParameters2(
      &tl2, tl2.getParameters(), 0,
      0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);

    unsigned int nc1 = OptimizationTools::optimizeNumericalParameters2(
      &ntl, ntl.getParameters(), 0,
      0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);

    unsigned int nc2 = OptimizationTools::optimizeNumericalParameters2(
      &ntl2, ntl2.getParameters(), 0,
      0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);

    cout << "OldTL: " << c1 << ": " << tl.getValue() << "\t" << c2 << ": " << tl2.getValue() << endl;
    cout << "NewTL: " << nc1 << ": " << ntl.getValue() << "\t" << nc2 << ": " << ntl2.getValue() << endl;

    cout << "Thetas : " << endl;

    for (size_t i = 0; i < nmodels; ++i) {
      //    cerr << modelSet->getModel(i)->getParameter("theta").getValue() << "\t" << modelSet2->getModel(i)->getParameter("theta").getValue();
      
      //      cerr << "\t"  << subPro->getModel(i)->getParameter("theta").getValue() << "\t" << subPro2->getModel(i)->getParameter("theta").getValue() << endl;
      // if (abs(modelSet2->getModel(i)->getParameter("theta").getValue() - modelSet3->getModel(i)->getParameter("theta").getValue()) > 0.1)
      //   return 1;
      thetasEst1[i] +=  modelSet->getModel(i)->getParameter("theta").getValue();
      thetasEst2[i] +=  modelSet2->getModel(i)->getParameter("theta").getValue();
      thetasEst1n[i] +=  dynamic_cast< NonHomogeneousSubstitutionProcess*>(nsubPro)->getModel(i)->getParameter("theta").getValue();
      thetasEst2n[i] +=  dynamic_cast< NonHomogeneousSubstitutionProcess*>(nsubPro2)->getModel(i)->getParameter("theta").getValue();
    }
  }
  thetasEst1 /= static_cast<double>(nrep);
  thetasEst2 /= static_cast<double>(nrep);
  thetasEst1n /= static_cast<double>(nrep);
  thetasEst2n /= static_cast<double>(nrep);

  //Now compare estimated values to real ones:
  cout << "Real" << "\t" << "Est_Old1" << "\t" << "Est_Old2" << "\t";
  cout << "Est_New1" << "\t" << "Est_New2" << endl;
  for (size_t i = 0; i < thetas.size(); ++i) {
    cout << thetas[i] << "\t" << thetasEst1[i] << "\t" << thetasEst2[i] << "\t";
    cout << thetasEst1n[i] << "\t" << thetasEst2n[i] << endl;
     double diff1 = abs(thetas[i] - thetasEst1[i]);
     double diff2 = abs(thetas[i] - thetasEst2[i]);
     double diffn1 = abs(thetas[i] - thetasEst1n[i]);
     double diffn2 = abs(thetas[i] - thetasEst2n[i]);
     if (diff1 > 0.2 || diff2 > 0.2 || diffn1 > 0.2 || diffn2 > 0.2)
       return 1;
  }

  return 0;
}
Esempio n. 14
0
int main() {
  TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("((A:0.001, B:0.002):0.008,C:0.01,D:0.1);");
  vector<int> ids = tree->getNodesId();
  ids.pop_back(); //Ignore root

  //-------------

  CodonAlphabet* alphabet = new CodonAlphabet(&AlphabetTools::DNA_ALPHABET);
  GeneticCode* gc = new StandardGeneticCode(&AlphabetTools::DNA_ALPHABET);
  CodonSubstitutionModel* model = new YN98(gc, CodonFrequenciesSet::getFrequenciesSetForCodons(CodonFrequenciesSet::F0, gc));
  //SubstitutionModel* model = new CodonRateSubstitutionModel(
  //      gc,
  //      new JCnuc(dynamic_cast<CodonAlphabet*>(alphabet)->getNucleicAlphabet()));
  cout << model->getNumberOfStates() << endl;
  MatrixTools::printForR(model->getGenerator(), "g");
  DiscreteDistribution* rdist = new ConstantDistribution(1.0);
  HomogeneousSequenceSimulator simulator(model, rdist, tree);
  TotalSubstitutionRegister* totReg = new TotalSubstitutionRegister(model);
  DnDsSubstitutionRegister* dndsReg = new DnDsSubstitutionRegister(model);

  unsigned int n = 20000;
  vector< vector<double> > realMap(n);
  vector< vector< vector<double> > > realMapTotal(n);
  vector< vector< vector<double> > > realMapDnDs(n);
  VectorSiteContainer sites(tree->getLeavesNames(), alphabet);
  for (unsigned int i = 0; i < n; ++i) {
    ApplicationTools::displayGauge(i, n-1, '=');
    RASiteSimulationResult* result = simulator.dSimulateSite();
    realMap[i].resize(ids.size());
    realMapTotal[i].resize(ids.size());
    realMapDnDs[i].resize(ids.size());
    for (size_t j = 0; j < ids.size(); ++j) {
      realMap[i][j] = static_cast<double>(result->getSubstitutionCount(ids[j]));
      realMapTotal[i][j].resize(totReg->getNumberOfSubstitutionTypes());
      realMapDnDs[i][j].resize(dndsReg->getNumberOfSubstitutionTypes());
      result->getSubstitutionCount(ids[j], *totReg, realMapTotal[i][j]);
      result->getSubstitutionCount(ids[j], *dndsReg, realMapDnDs[i][j]);
      if (realMapTotal[i][j][0] != realMap[i][j]) {
        cerr << "Error, total substitution register provides wrong result." << endl;
        return 1;
      }
      //if (abs(VectorTools::sum(realMapDetailed[i][j]) - realMap[i][j]) > 0.000001) {
      //  cerr << "Error, detailed substitution register provides wrong result." << endl;
      //  return 1;
      //}
    }
    auto_ptr<Site> site(result->getSite(*model));
    site->setPosition(static_cast<int>(i));
    sites.addSite(*site, false);
    delete result;
  }
  ApplicationTools::displayTaskDone();
  
  //-------------
  //Now build the substitution vectors with the true model:
  //Fasta fasta;
  //fasta.write("Simulations.fasta", sites);
  DRHomogeneousTreeLikelihood drhtl(*tree, sites, model, rdist);
  drhtl.initialize();
  cout << drhtl.getValue() << endl;
 
  SubstitutionCount* sCountAna = new LaplaceSubstitutionCount(model, 10);
  Matrix<double>* m = sCountAna->getAllNumbersOfSubstitutions(0.001,1);
  cout << "Analytical total count:" << endl;
  MatrixTools::print(*m);
  delete m;
  ProbabilisticSubstitutionMapping* probMapAna = 
    SubstitutionMappingTools::computeSubstitutionVectors(drhtl, ids, *sCountAna);

  SubstitutionCount* sCountTot = new NaiveSubstitutionCount(model, totReg);
  m = sCountTot->getAllNumbersOfSubstitutions(0.001,1);
  cout << "Simple total count:" << endl;
  MatrixTools::print(*m);
  delete m;
  ProbabilisticSubstitutionMapping* probMapTot = 
    SubstitutionMappingTools::computeSubstitutionVectors(drhtl, ids, *sCountTot);

  SubstitutionCount* sCountDnDs = new NaiveSubstitutionCount(model, dndsReg);
  m = sCountDnDs->getAllNumbersOfSubstitutions(0.001,1);
  cout << "Detailed count, type 1:" << endl;
  MatrixTools::print(*m);
  delete m;
  ProbabilisticSubstitutionMapping* probMapDnDs = 
    SubstitutionMappingTools::computeSubstitutionVectors(drhtl, ids, *sCountDnDs);

  SubstitutionCount* sCountUniTot = new UniformizationSubstitutionCount(model, totReg);
  m = sCountUniTot->getAllNumbersOfSubstitutions(0.001,1);
  cout << "Total count, uniformization method:" << endl;
  MatrixTools::print(*m);
  delete m;
  ProbabilisticSubstitutionMapping* probMapUniTot = 
    SubstitutionMappingTools::computeSubstitutionVectors(drhtl, ids, *sCountUniTot);

  SubstitutionCount* sCountUniDnDs = new UniformizationSubstitutionCount(model, dndsReg);
  m = sCountUniDnDs->getAllNumbersOfSubstitutions(0.001,2);
  cout << "Detailed count, uniformization method, type 2:" << endl;
  MatrixTools::print(*m);
  delete m;
  ProbabilisticSubstitutionMapping* probMapUniDnDs = 
    SubstitutionMappingTools::computeSubstitutionVectors(drhtl, ids, *sCountUniDnDs);

  //Check per branch:
  
  /*
  //1. Total:
  for (unsigned int j = 0; j < ids.size(); ++j) {
    double totalReal = 0;
    double totalObs1 = 0;
    double totalObs2 = 0;
    double totalObs3 = 0;
    double totalObs4 = 0;
    double totalObs5 = 0;
    for (unsigned int i = 0; i < n; ++i) {
      totalReal += realMap[i][j];
      totalObs1 += probMapAna->getNumberOfSubstitutions(ids[j], i, 0);
      totalObs2 += probMapTot->getNumberOfSubstitutions(ids[j], i, 0);
      //totalObs3 += VectorTools::sum(probMapDet->getNumberOfSubstitutions(ids[j], i));
      totalObs4 += probMapDecTot->getNumberOfSubstitutions(ids[j], i, 0);
      //totalObs5 += VectorTools::sum(probMapDecDet->getNumberOfSubstitutions(ids[j], i));
    }
    if (tree->isLeaf(ids[j])) cout << tree->getNodeName(ids[j]) << "\t";
    cout << tree->getDistanceToFather(ids[j]) << "\t" << totalReal << "\t" << totalObs1 << "\t" << totalObs2 << "\t" << totalObs3 << "\t" << totalObs4 << "\t" << totalObs5 << endl;
    if (abs(totalReal - totalObs1) / totalReal > 0.1) return 1;
    if (abs(totalReal - totalObs2) / totalReal > 0.1) return 1;
    if (abs(totalReal - totalObs3) / totalReal > 0.1) return 1;
    if (abs(totalReal - totalObs4) / totalReal > 0.1) return 1;
  }
  //2. Detail:
  for (unsigned int j = 0; j < ids.size(); ++j) {
    vector<double> real(4, 0);
    vector<double> obs1(4, 0);
    vector<double> obs2(4, 0);
    for (unsigned int i = 0; i < n; ++i) {
      real += realMapDetailed[i][j];
      //VectorTools::print(real);
      //vector<double> c = probMapDet->getNumberOfSubstitutions(ids[j], i);
      //VectorTools::print(c);
      //obs1 += probMapDet->getNumberOfSubstitutions(ids[j], i);
      //obs2 += probMapDecDet->getNumberOfSubstitutions(ids[j], i);
    }
    if (tree->isLeaf(ids[j])) cout << tree->getNodeName(ids[j]) << "\t";
    cout << tree->getDistanceToFather(ids[j]) << "\t";
    for (unsigned int t = 0; t < 4; ++t) {
      cout << obs1[t] << "/" << real[t] << "\t";
      cout << obs2[t] << "/" << real[t] << "\t";
    }
    cout << endl;
    //if (abs(totalReal - totalObs) / totalReal > 0.1) return 1;
  }
  */

  //-------------
  delete tree;
  delete alphabet;
  delete model;
  delete rdist;
  delete sCountTot;
  delete sCountDnDs;
  delete probMapAna;
  delete probMapTot;
  delete probMapDnDs;
  delete probMapUniTot;
  delete probMapUniDnDs;
  //return (abs(obs - 0.001) < 0.001 ? 0 : 1);
  return 0;
}
Esempio n. 15
0
void TreeTemplateTools::getBestRootInSubtree_(TreeTemplate<Node>& tree, short criterion, Node* node, pair<Node*, map<string, double> >& bestRoot)
{
  const vector<Node*> sons = node->getSons(); // copy
  tree.rootAt(node);

  // Try to place the root on each branch downward node
  for (vector<Node*>::const_iterator son = sons.begin(); son != sons.end(); ++son)
  {
    // Compute the moment of the subtree on son's side
    Moments_ son_moment = getSubtreeMoments_(*son);

    // Compute the moment of the subtree on node's side
    tree.rootAt(*son);
    Moments_ node_moment = getSubtreeMoments_(node);
    tree.rootAt(node);

    /*
     * Get the position of the root on this branch that
     * minimizes the root-to-leaves distances variance.
     *
     * This variance can be written in the form A x^2 + B x + C
     */
    double min_criterion_value;
    double best_position; // 0 is toward the root, 1 is away from it

    const TreeTemplateTools::Moments_& m1 = node_moment;
    const TreeTemplateTools::Moments_& m2 = son_moment;
    const double d = (**son).getDistanceToFather();
    const double n1 = m1.numberOfLeaves;
    const double n2 = m2.numberOfLeaves;

    double A = 0, B = 0, C = 0;
    if (criterion == MIDROOT_SUM_OF_SQUARES)
    {
      A = (n1 + n2) * d * d;
      B = 2 * d * (m1.sum - m2.sum) - 2 * n2 * d * d;
      C = m1.squaresSum + m2.squaresSum
          + 2 * m2.sum * d
          + n2 * d * d;
    }
    else if (criterion == MIDROOT_VARIANCE)
    {
      A = 4 * n1 * n2 * d * d;
      B = 4 * d * ( n2 * m1.sum - n1 * m2.sum - d * n1 * n2);
      C = (n1 + n2) * (m1.squaresSum + m2.squaresSum) + n1 * d * n2 * d
          + 2 * n1 * d * m2.sum - 2 * n2 * d * m1.sum
          - (m1.sum + m2.sum) * (m1.sum + m2.sum);
    }

    if (A < 1e-20)
    {
      min_criterion_value = numeric_limits<double>::max();
      best_position = 0.5;
    }
    else
    {
      min_criterion_value = C - B * B / (4 * A);
      best_position = -B / (2 * A);
      if (best_position < 0)
      {
        best_position = 0;
        min_criterion_value = C;
      }
      else if (best_position > 1)
      {
        best_position = 1;
        min_criterion_value = A + B + C;
      }
    }

    // Is this branch is the best seen, update 'bestRoot'
    if (min_criterion_value < bestRoot.second["score"])
    {
      bestRoot.first = *son;
      bestRoot.second["position"] = best_position;
      bestRoot.second["score"] = min_criterion_value;
    }

    // Recurse
    TreeTemplateTools::getBestRootInSubtree_(tree, criterion, *son, bestRoot);
  }
}
Esempio n. 16
0
TreeTemplate<Node>* BipartitionList::toTree() const throw (Exception)
{
  BipartitionList* sortedBipL;
  vector<int*> sortedBitBipL;
  int* bip;
  vector<Node*> vecNd, sonNd;
  vector<bool> alive;
  size_t lword, nbword, nbint, ii;

  /* check, copy and prepare bipartition list */

  if (!BipartitionList::areAllCompatible())
    throw Exception("Trying to build a tree from incompatible bipartitions");

  sortedBipL = dynamic_cast<BipartitionList*>(clone());
  for (size_t i = 0; i < sortedBipL->getNumberOfBipartitions(); i++)
  {
    if (sortedBipL->getPartitionSize(i) > sortedBipL->getNumberOfElements() / 2)
      sortedBipL->flip(i);
  }
  sortedBipL->sortByPartitionSize();
  sortedBipL->removeRedundantBipartitions();
  sortedBitBipL = sortedBipL->getBitBipartitionList();

  for (size_t i = 0; i < sortedBipL->getNumberOfBipartitions(); i++)
  {
    alive.push_back(true);
  }
  vecNd.resize(sortedBipL->getNumberOfBipartitions() + 1);
  lword  = static_cast<size_t>(BipartitionTools::LWORD);
  nbword = (elements_.size() + lword - 1) / lword;
  nbint  = nbword * lword / (CHAR_BIT * sizeof(int));
  bip    = new int[1]; bip[0] = 0;

  /* main loop: create one node per bipartition */
  for (size_t i = 0; i < sortedBipL->getNumberOfBipartitions(); i++)
  {
    if (sortedBipL->getPartitionSize(i) == 1)
    { // terminal
      for (size_t j = 0; j < sortedBipL->getNumberOfElements(); j++)
      {
        if (BipartitionTools::testBit(sortedBitBipL[i], static_cast<int>(j)))
        {
          vecNd[i] = new Node(elements_[j]);
          break;
        }
      }
    }
    else
    { // internal
      sonNd.clear();
      for (size_t j = 0; j < i; j++)
      {
        if (alive[j])
        {
          for (ii = 0; ii < nbint; ii++)
          {
            BipartitionTools::bitOr(bip, sortedBitBipL[j] + ii, sortedBitBipL[i] + ii, 1);
            if (bip[0] != sortedBitBipL[i][ii])
              break;
          }
          if (ii == nbint)
          {
            sonNd.push_back(vecNd[j]);
            alive[j] = false;
          }
        }
      }
      vecNd[i] = new Node();
      for (size_t k = 0; k < sonNd.size(); k++)
      {
        vecNd[i]->addSon(sonNd[k]);
      }
    }
  }

  /* create last node, which joins alive bipartitions = fatherless nodes */
  Node* rootNd = new Node();
  for (size_t i = 0; i < sortedBipL->getNumberOfBipartitions(); i++)
  {
    if (alive[i])
      rootNd->addSon(vecNd[i]);
  }

  /* construct tree and return */
  TreeTemplate<Node>* tr = new TreeTemplate<Node>(rootNd);
  tr->resetNodesId();
  delete sortedBipL;
  return tr;
}
Esempio n. 17
0
void ClusterTools::computeNormProperties(TreeTemplate<Node>& tree, const ProbabilisticSubstitutionMapping & mapping)
{
  double min;
  computeNormProperties_(tree.getRootNode(), mapping, min);
}
Esempio n. 18
0
ProbabilisticRewardMapping* RewardMappingTools::computeRewardVectors(
  const DRTreeLikelihood& drtl,
  const vector<int>& nodeIds,
  Reward& reward,
  bool verbose) throw (Exception)
{
  // Preamble:
  if (!drtl.isInitialized())
    throw Exception("RewardMappingTools::computeRewardVectors(). Likelihood object is not initialized.");

  // A few variables we'll need:

  const TreeTemplate<Node> tree(drtl.getTree());
  const SiteContainer*    sequences = drtl.getData();
  const DiscreteDistribution* rDist = drtl.getRateDistribution();

  size_t nbSites         = sequences->getNumberOfSites();
  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
  size_t nbStates        = sequences->getAlphabet()->getSize();
  size_t nbClasses       = rDist->getNumberOfCategories();
  vector<const Node*> nodes    = tree.getNodes();
  const vector<size_t>* rootPatternLinks
    = &drtl.getLikelihoodData()->getRootArrayPositions();
  nodes.pop_back(); // Remove root node.
  size_t nbNodes         = nodes.size();

  // We create a new ProbabilisticRewardMapping object:
  ProbabilisticRewardMapping* rewards = new ProbabilisticRewardMapping(tree, &reward, nbSites);

  // Store likelihood for each rate for each site:
  VVVdouble lik;
  drtl.computeLikelihoodAtNode(tree.getRootId(), lik);
  Vdouble Lr(nbDistinctSites, 0);
  Vdouble rcProbs = rDist->getProbabilities();
  Vdouble rcRates = rDist->getCategories();
  for (size_t i = 0; i < nbDistinctSites; i++)
  {
    VVdouble* lik_i = &lik[i];
    for (size_t c = 0; c < nbClasses; c++)
    {
      Vdouble* lik_i_c = &(*lik_i)[c];
      double rc = rDist->getProbability(c);
      for (size_t s = 0; s < nbStates; s++)
      {
        Lr[i] += (*lik_i_c)[s] * rc;
      }
    }
  }

  // Compute the reward for each class and each branch in the tree:
  if (verbose)
    ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);

  for (size_t l = 0; l < nbNodes; ++l)
  {
    // For each node,
    const Node* currentNode = nodes[l];
    if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
      continue;

    const Node* father = currentNode->getFather();

    double d = currentNode->getDistanceToFather();

    if (verbose)
      ApplicationTools::displayGauge(l, nbNodes - 1);
    Vdouble rewardsForCurrentNode(nbDistinctSites);

    // Now we've got to compute likelihoods in a smart manner... ;)
    VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
    for (size_t i = 0; i < nbDistinctSites; i++)
    {
      VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
      likelihoodsFatherConstantPart_i->resize(nbClasses);
      for (size_t c = 0; c < nbClasses; c++)
      {
        Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
        likelihoodsFatherConstantPart_i_c->resize(nbStates);
        double rc = rDist->getProbability(c);
        for (size_t s = 0; s < nbStates; s++)
        {
          // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
          // freq is already accounted in the array
          (*likelihoodsFatherConstantPart_i_c)[s] = rc;
        }
      }
    }

    // First, what will remain constant:
    size_t nbSons =  father->getNumberOfSons();
    for (size_t n = 0; n < nbSons; n++)
    {
      const Node* currentSon = father->getSon(n);
      if (currentSon->getId() != currentNode->getId())
      {
        const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());

        // Now iterate over all site partitions:
        auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId()));
        VVVdouble pxy;
        bool first;
        while (mit->hasNext())
        {
          TreeLikelihood::ConstBranchModelDescription* bmd = mit->next();
          auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
          first = true;
          while (sit->hasNext())
          {
            size_t i = sit->next();
            // We retrieve the transition probabilities for this site partition:
            if (first)
            {
              pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
              first = false;
            }
            const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
            VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
            for (size_t c = 0; c < nbClasses; c++)
            {
              const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
              Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
              VVdouble* pxy_c = &pxy[c];
              for (size_t x = 0; x < nbStates; x++)
              {
                Vdouble* pxy_c_x = &(*pxy_c)[x];
                double likelihood = 0.;
                for (size_t y = 0; y < nbStates; y++)
                {
                  likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y];
                }
                (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
              }
            }
          }
        }
      }
    }
    if (father->hasFather())
    {
      const Node* currentSon = father->getFather();
      const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
      // Now iterate over all site partitions:
      auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId()));
      VVVdouble pxy;
      bool first;
      while (mit->hasNext())
      {
        TreeLikelihood::ConstBranchModelDescription* bmd = mit->next();
        auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
        first = true;
        while (sit->hasNext())
        {
          size_t i = sit->next();
          // We retrieve the transition probabilities for this site partition:
          if (first)
          {
            pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i);
            first = false;
          }
          const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
          VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
          for (size_t c = 0; c < nbClasses; c++)
          {
            const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
            Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
            VVdouble* pxy_c = &pxy[c];
            for (size_t x = 0; x < nbStates; x++)
            {
              double likelihood = 0.;
              for (size_t y = 0; y < nbStates; y++)
              {
                Vdouble* pxy_c_x = &(*pxy_c)[y];
                likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y];
              }
              (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
            }
          }
        }
      }
    }
    else
    {
      // Account for root frequencies:
      for (size_t i = 0; i < nbDistinctSites; i++)
      {
        vector<double> freqs = drtl.getRootFrequencies(i);
        VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
        for (size_t c = 0; c < nbClasses; c++)
        {
          Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
          for (size_t x = 0; x < nbStates; x++)
          {
            (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x];
          }
        }
      }
    }

    // Then, we deal with the node of interest.
    // We first average upon 'y' to save computations, and then upon 'x'.
    // ('y' is the state at 'node' and 'x' the state at 'father'.)

    // Iterate over all site partitions:
    const VVVdouble* likelihoodsFather_node = &(drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId()));
    auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
    VVVdouble pxy;
    bool first;
    while (mit->hasNext())
    {
      TreeLikelihood::ConstBranchModelDescription* bmd = mit->next();
      reward.setSubstitutionModel(bmd->getModel());
      // compute all nxy first:
      VVVdouble nxy(nbClasses);
      for (size_t c = 0; c < nbClasses; ++c)
      {
        VVdouble* nxy_c = &nxy[c];
        double rc = rcRates[c];
        Matrix<double>* nij = reward.getAllRewards(d * rc);
        nxy_c->resize(nbStates);
        for (size_t x = 0; x < nbStates; ++x)
        {
          Vdouble* nxy_c_x = &(*nxy_c)[x];
          nxy_c_x->resize(nbStates);
          for (size_t y = 0; y < nbStates; ++y)
          {
            (*nxy_c_x)[y] = (*nij)(x, y);
          }
        }
        delete nij;
      }

      // Now loop over sites:
      auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
      first = true;
      while (sit->hasNext())
      {
        size_t i = sit->next();
        // We retrieve the transition probabilities and substitution counts for this site partition:
        if (first)
        {
          pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
          first = false;
        }
        const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i];
        VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
        for (size_t c = 0; c < nbClasses; ++c)
        {
          const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c];
          Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
          const VVdouble* pxy_c = &pxy[c];
          VVdouble* nxy_c = &nxy[c];
          for (size_t x = 0; x < nbStates; ++x)
          {
            double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x];
            const Vdouble* pxy_c_x = &(*pxy_c)[x];
            for (size_t y = 0; y < nbStates; ++y)
            {
              double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x)
                                      * (*pxy_c_x)[y]
                                      * (*likelihoodsFather_node_i_c)[y];

              // Now the vector computation:
              rewardsForCurrentNode[i] += likelihood_cxy * (*nxy_c)[x][y];
              //                       <------------>   <--------------->
              // Posterior probability         |                 |
              // for site i and rate class c * |                 |
              // likelihood for this site------+                 |
              //                                                 |
              // Reward function for site i and rate class c------+
            }
          }
        }
      }
    }

    // Now we just have to copy the substitutions into the result vector:
    for (size_t i = 0; i < nbSites; ++i)
    {
      (*rewards)(l, i) = rewardsForCurrentNode[(*rootPatternLinks)[i]] / Lr[(*rootPatternLinks)[i]];
    }
  }
  if (verbose)
  {
    if (ApplicationTools::message)
      *ApplicationTools::message << " ";
    ApplicationTools::displayTaskDone();
  }
  return rewards;
}
Esempio n. 19
0
int main() {
  TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("(((A:0.1, B:0.2):0.3,C:0.1):0.2,(D:0.3,(E:0.2,F:0.05):0.1):0.1);");
  vector<string> seqNames= tree->getLeavesNames();
  vector<int> ids = tree->getNodesId();
  //-------------

  const NucleicAlphabet* alphabet = &AlphabetTools::DNA_ALPHABET;
  FrequenciesSet* rootFreqs = new GCFrequenciesSet(alphabet);
  SubstitutionModel* model = new T92(alphabet, 3.);
  std::vector<std::string> globalParameterNames;
  globalParameterNames.push_back("T92.kappa");
  map<string, string> alias;

  SubstitutionModelSet* modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, alias, globalParameterNames);
  //DiscreteDistribution* rdist = new ConstantDistribution(1.0, true);
  //Very difficult to optimize on small datasets:
  DiscreteDistribution* rdist = new GammaDiscreteRateDistribution(4, 1.0);

  size_t nsites = 1000;
  unsigned int nrep = 20;
  size_t nmodels = modelSet->getNumberOfModels();
  vector<double> thetas(nmodels);
  vector<double> thetasEst1(nmodels);
  vector<double> thetasEst2(nmodels);

  for (size_t i = 0; i < nmodels; ++i) {
    double theta = RandomTools::giveRandomNumberBetweenZeroAndEntry(0.99) + 0.005;
    cout << "Theta" << i << " set to " << theta << endl; 
    modelSet->setParameterValue("T92.theta_" + TextTools::toString(i + 1), theta);
    thetas[i] = theta;
  }
  NonHomogeneousSequenceSimulator simulator(modelSet, rdist, tree);
 
  for (unsigned int j = 0; j < nrep; j++) {

    OutputStream* profiler  = new StlOutputStream(new ofstream("profile.txt", ios::out));
    OutputStream* messenger = new StlOutputStream(new ofstream("messages.txt", ios::out));

    //Simulate data:
    auto_ptr<SiteContainer> sites(simulator.simulate(nsites));
    //Now fit model:
    auto_ptr<SubstitutionModelSet> modelSet2(modelSet->clone());
    auto_ptr<SubstitutionModelSet> modelSet3(modelSet->clone());
    RNonHomogeneousTreeLikelihood tl(*tree, *sites.get(), modelSet2.get(), rdist, true, true, false);
    tl.initialize();
    RNonHomogeneousTreeLikelihood tl2(*tree, *sites.get(), modelSet3.get(), rdist, true, true, true);
    tl2.initialize();
   
    unsigned int c1 = OptimizationTools::optimizeNumericalParameters2(
        &tl, tl.getParameters(), 0,
        0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);

    unsigned int c2 = OptimizationTools::optimizeNumericalParameters2(
        &tl2, tl2.getParameters(), 0,
        0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);

    cout << c1 << ": " << tl.getValue() << "\t" << c2 << ": " << tl2.getValue() << endl;
      
    for (size_t i = 0; i < nmodels; ++i) {
      cout << modelSet2->getModel(i)->getParameter("theta").getValue() << "\t" << modelSet3->getModel(i)->getParameter("theta").getValue() << endl;
      //if (abs(modelSet2->getModel(i)->getParameter("theta").getValue() - modelSet3->getModel(i)->getParameter("theta").getValue()) > 0.1)
      //  return 1;
      thetasEst1[i] +=  modelSet2->getModel(i)->getParameter("theta").getValue();
      thetasEst2[i] +=  modelSet3->getModel(i)->getParameter("theta").getValue();
    }
  }
  thetasEst1 /= static_cast<double>(nrep);
  thetasEst2 /= static_cast<double>(nrep);

  //Now compare estimated values to real ones:
  for (size_t i = 0; i < thetas.size(); ++i) {
     cout << thetas[i] << "\t" << thetasEst1[i] << "\t" << thetasEst2[i] << endl;
     double diff1 = abs(thetas[i] - thetasEst1[i]);
     double diff2 = abs(thetas[i] - thetasEst2[i]);
     if (diff1 > 0.2 || diff2 > 0.2)
        return 1;
  }

  //-------------
  delete tree;
  delete modelSet;
  delete rdist;

  return 0;
}
Esempio n. 20
0
int main() {
  //Get some leaf names:
  vector<string> leaves(100);
  for (size_t i = 0; i < leaves.size(); ++i)
    leaves[i] = "leaf" + TextTools::toString(i);
  
  for (unsigned int j = 0; j < 1000; ++j) {
    //Generate a random tree, without branch lengths:
    TreeTemplate<Node>* tree = TreeTemplateTools::getRandomTree(leaves, true);
    TreeTemplate<Node>* tree2 = new TreeTemplate<Node>(*tree);
    if (!tree->hasSameTopologyAs(*tree2))
      return 1; //Error!!!
    tree2->getRootNode()->swap(0,1);
    //cout << "First test passed." << endl;
    if (!tree->hasSameTopologyAs(*tree2))
      return 1; //Error!!!
    //cout << "Second test passed." << endl;
  
    //Convert tree to string and read it again:
    string newick = TreeTemplateTools::treeToParenthesis(*tree);
    TreeTemplate<Node>* tree3 = TreeTemplateTools::parenthesisToTree(newick, true, TreeTools::BOOTSTRAP, false, false);
    if (!tree->hasSameTopologyAs(*tree3))
      return 1; //Error!!!
    //cout << "Third test passed." << endl;
    
    //-------------
    delete tree;
    delete tree2;
    delete tree3;
  }

  //Try to parse a string:
  TreeTemplate<Node>* tree4 = TreeTemplateTools::parenthesisToTree("((A:1,B:2):3,C:4);");
  cout << TreeTemplateTools::treeToParenthesis(*tree4) << endl;
  delete tree4;

  TreeTemplate<Node>* tree5 = TreeTemplateTools::parenthesisToTree("((A:1,B:2):3,C:4):5;");
  cout << TreeTemplateTools::treeToParenthesis(*tree5) << endl;
  delete tree5;

  Newick tReader;
  istringstream iss6("((A,B),C);");
  TreeTemplate<Node>* tree6 = tReader.read(iss6);
  cout << TreeTemplateTools::treeToParenthesis(*tree6) << endl;
  delete tree6;
  
  istringstream iss7("((A:1,B:2):3,C:4):5;");
  TreeTemplate<Node>* tree7 = tReader.read(iss7);
  cout << TreeTemplateTools::treeToParenthesis(*tree7) << endl;
  delete tree7;

  istringstream iss8("((A:1,B:2)80:3,C:4)2:5;");
  TreeTemplate<Node>* tree8 = tReader.read(iss8);
  cout << TreeTemplateTools::treeToParenthesis(*tree8) << endl;
  vector<int> ids = tree8->getNodesId();
  for (size_t i = 0; i < ids.size(); ++i) {
    cout << "Node " << ids[i] << ":" << endl;
    if (tree8->getNode(ids[i])->hasBranchProperty(TreeTools::BOOTSTRAP))
      cout << "N: BOOTSTRAP=" << dynamic_cast<Number<double>*>(tree8->getNode(ids[i])->getBranchProperty(TreeTools::BOOTSTRAP))->getValue() << endl;
    vector<string> branchPpt = tree8->getNode(ids[i])->getBranchPropertyNames();
  }
  delete tree8;

  istringstream iss9("((A,B)aa,C)2;");
  tReader.enableExtendedBootstrapProperty("ESS");
  TreeTemplate<Node>* tree9 = tReader.read(iss9);
  cout << TreeTemplateTools::treeToParenthesis(*tree9) << endl;
  ids = tree9->getNodesId();
  for (size_t i = 0; i < ids.size(); ++i) {
    cout << "Node " << ids[i] << ":" << endl;
    vector<string> nodePpt = tree9->getNode(ids[i])->getNodePropertyNames();
    for (size_t j = 0; j < nodePpt.size(); ++j)
      if (tree9->getNode(ids[i])->hasNodeProperty(nodePpt[j]))
        cout << "N: " << nodePpt[j] << "=" << dynamic_cast<BppString*>(tree9->getNode(ids[i])->getNodeProperty(nodePpt[j]))->toSTL() << endl;
    vector<string> branchPpt = tree9->getNode(ids[i])->getBranchPropertyNames();
    for (size_t j = 0; j < branchPpt.size(); ++j)
      if (tree9->getNode(ids[i])->hasBranchProperty(branchPpt[j]))
        cout << "B: " << branchPpt[j] << "=" << dynamic_cast<BppString*>(tree9->getNode(ids[i])->getBranchProperty(branchPpt[j]))->toSTL() << endl;
  }
  delete tree9;

  //Test file parsing:
  TreeTemplate<Node>* tree10 = TreeTemplateTools::getRandomTree(leaves, true);
  Newick tWriter;
  tWriter.write(*tree10, "tmp_tree.dnd");
  Tree* test = tReader.read("tmp_tree.dnd");
  if (!TreeTools::haveSameTopology(*tree10, *test))
    return 1;
  cout << "Newick I/O ok." << endl;

  //Multiple trees:
  vector<Tree *> trees;
  for (unsigned int i = 0; i < 100; ++i) {
    trees.push_back(TreeTemplateTools::getRandomTree(leaves, true));
  }
  tWriter.write(trees, "tmp_trees.dnd");

  vector<Tree *> trees2;
  tReader.read("tmp_trees.dnd", trees2);

  for (unsigned int i = 0; i < 100; ++i) {
    if (!TreeTools::haveSameTopology(*trees[i], *trees2[i]))
    {
      cerr << "Tree " << i << " failed to write and/or read!" << endl;
      return 1;
    }
  }
  cout << "Newick multiple I/O ok." << endl;

  for (unsigned int i = 0; i < 100; ++i) {
    delete trees[i];
    delete trees2[i];
  }

  //Try newick read on non-file:
  cout << "Testing parsing a directory..." << endl;
  try {
    Tree* tmp = tReader.read("test/");
    cerr << "Arg, reading on directory should fail!" << endl;
    if (tmp) {
      cerr << "Output of read on directory is not NULL!" << endl;
    }
    return 1;
  } catch (Exception& ex) {
    cout << "Ok, reading on directory throws exception!" << endl;
  }

  cout << "Testing parsing a directory for multiple trees..." << endl;
  try {
    vector<Tree*> treesTmp;
    tReader.read("test/", treesTmp);
    if (treesTmp.size() != 0) {
      cerr << "Output of multiple read on directory is not 0!" << endl;
      return 1;
    } else {
      cout << "Ok, reading on directory returns a vector of size 0!" << endl;
    }
  } catch(Exception& ex) {
    cout << "Error, no exception should be thrown here!" << endl;
  }

  //Now try some weird cases, to see if we handle them properly:
  //single node tree:
  cout << "Testing a tree with a node of degree 2:" << endl;
  TreeTemplate<Node>* weird1 = TreeTemplateTools::parenthesisToTree("((A:1):2.0,B);");
  if (weird1->getNodes().size() != 4) {
    cout << "Error, tree has " << weird1->getNodes().size() << " node(s) instead of 4!" << endl;
    VectorTools::print(weird1->getLeavesNames());
    return 1;
  }
  cout << TreeTemplateTools::treeToParenthesis(*weird1) << endl;
  delete weird1;

  cout << "Testing a tree with a node of degree 2, without branch length:" << endl;
  TreeTemplate<Node>* weird2 = TreeTemplateTools::parenthesisToTree("((A),B);");
  if (weird2->getNodes().size() != 4) {
    cout << "Error, tree has " << weird2->getNodes().size() << " node(s) instead of 4!" << endl;
    VectorTools::print(weird2->getLeavesNames());
    return 1;
  }
  cout << TreeTemplateTools::treeToParenthesis(*weird2) << endl;
  delete weird2;

  cout << "Testing a tree with several single nodes:" << endl;
  TreeTemplate<Node>* weird3 = TreeTemplateTools::parenthesisToTree("((((((A)):1)):3),B);");
  if (weird3->getNodes().size() != 8) {
    cout << "Error, tree has " << weird3->getNodes().size() << " node(s) instead of 8!" << endl;
    VectorTools::print(weird3->getLeavesNames());
    return 1;
  }
  cout << TreeTemplateTools::treeToParenthesis(*weird3) << endl;
  delete weird3;

  cout << "Testing a tree with a single leaf:" << endl;
  TreeTemplate<Node>* weird4 = TreeTemplateTools::parenthesisToTree("(A:1.0);");
  if (weird4->getNodes().size() != 2) {
    cout << "Error, tree has " << weird4->getNodes().size() << " node(s) instead of 2!" << endl;
    VectorTools::print(weird4->getLeavesNames());
    return 1;
  }
  cout << TreeTemplateTools::treeToParenthesis(*weird4) << endl;
  delete weird4;

  cout << "Testing a tree with a single node:" << endl;
  TreeTemplate<Node>* weird5 = TreeTemplateTools::parenthesisToTree("((A:1.0));");
  if (weird5->getNodes().size() != 3) {
    cout << "Error, tree has " << weird5->getNodes().size() << " node(s) instead of 3!" << endl;
    VectorTools::print(weird5->getLeavesNames());
    return 1;
  }
  cout << TreeTemplateTools::treeToParenthesis(*weird5) << endl;
  delete weird5;

  cout << "Testing a tree with a single node and branch lengths:" << endl;
  TreeTemplate<Node>* weird6 = TreeTemplateTools::parenthesisToTree("((A:1.0):2.0);");
  if (weird6->getNodes().size() != 3) {
    cout << "Error, tree has " << weird6->getNodes().size() << " node(s) instead of 3!" << endl;
    VectorTools::print(weird6->getLeavesNames());
    return 1;
  }
  cout << TreeTemplateTools::treeToParenthesis(*weird6) << endl;
  delete weird6;
 
  return 0;
}
Esempio n. 21
0
int main() {
  TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("((A:0.01, B:0.02):0.03,C:0.01,D:0.1);");
  vector<string> seqNames= tree->getLeavesNames();
  vector<int> ids = tree->getNodesId();
  //-------------

  NucleicAlphabet* alphabet = new DNA();
  SubstitutionModel* model = new T92(alphabet, 3.);
  FrequenciesSet* rootFreqs = new GCFrequenciesSet(alphabet);
  std::vector<std::string> globalParameterNames;
  globalParameterNames.push_back("T92.kappa");
  map<string, string> alias;

  SubstitutionModelSet* modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, alias, globalParameterNames);
  DiscreteDistribution* rdist = new ConstantRateDistribution();
  vector<double> thetas;
  for (unsigned int i = 0; i < modelSet->getNumberOfModels(); ++i) {
    double theta = RandomTools::giveRandomNumberBetweenZeroAndEntry(0.99) + 0.005;
    cout << "Theta" << i << " set to " << theta << endl; 
    modelSet->setParameterValue("T92.theta_" + TextTools::toString(i + 1), theta);
    thetas.push_back(theta);
  }
  NonHomogeneousSequenceSimulator simulator(modelSet, rdist, tree);

  unsigned int n = 100000;
  OutputStream* profiler  = new StlOutputStream(new ofstream("profile.txt", ios::out));
  OutputStream* messenger = new StlOutputStream(new ofstream("messages.txt", ios::out));

  //Check fast simulation first:
 
  cout << "Fast check:" << endl;
 
  //Generate data set:
  VectorSiteContainer sites(seqNames, alphabet);
  for (unsigned int i = 0; i < n; ++i) {
    auto_ptr<Site> site(simulator.simulateSite());
    site->setPosition(static_cast<int>(i));
    sites.addSite(*site, false);
  }

  //Now fit model:
  SubstitutionModelSet* modelSet2 = modelSet->clone();
  RNonHomogeneousTreeLikelihood tl(*tree, sites, modelSet2, rdist);
  tl.initialize();

  OptimizationTools::optimizeNumericalParameters2(
      &tl, tl.getParameters(), 0,
      0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);

  //Now compare estimated values to real ones:
  for (size_t i = 0; i < thetas.size(); ++i) {
    cout << thetas[i] << "\t" << modelSet2->getModel(i)->getParameter("theta").getValue() << endl;
    double diff = abs(thetas[i] - modelSet2->getModel(i)->getParameter("theta").getValue());
    if (diff > 0.1)
      return 1;
  }
  delete modelSet2;

  //Now try detailed simulations:

  cout << "Detailed check:" << endl;
  
  //Generate data set:
  VectorSiteContainer sites2(seqNames, alphabet);
  for (unsigned int i = 0; i < n; ++i) {
    RASiteSimulationResult* result = simulator.dSimulateSite();
    auto_ptr<Site> site(result->getSite(*simulator.getSubstitutionModelSet()->getModel(0)));
    site->setPosition(static_cast<int>(i));
    sites2.addSite(*site, false);
    delete result;
  }

  //Now fit model:
  SubstitutionModelSet* modelSet3 = modelSet->clone();
  RNonHomogeneousTreeLikelihood tl2(*tree, sites2, modelSet3, rdist);
  tl2.initialize();

  OptimizationTools::optimizeNumericalParameters2(
      &tl2, tl2.getParameters(), 0,
      0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);

  //Now compare estimated values to real ones:
  for (size_t i = 0; i < thetas.size(); ++i) {
    cout << thetas[i] << "\t" << modelSet3->getModel(i)->getParameter("theta").getValue() << endl;
    double diff = abs(thetas[i] - modelSet3->getModel(i)->getParameter("theta").getValue());
    if (diff > 0.1)
      return 1;
  }
  delete modelSet3;

  //-------------
  delete tree;
  delete alphabet;
  delete modelSet;
  delete rdist;

  return 0;
}
Esempio n. 22
0
vector<const Node *> ClusterTools::getSubtreesWithSize(const TreeTemplate<Node>& tree, size_t size)
{
  vector<const Node *> subtrees;
  getSubtreesWithSize(tree.getRootNode(), size, subtrees);
  return subtrees;
}