string Alignment::get_abayes_tree() { TreeTemplate<Node> tree = TreeTemplate<Node>(likelihood->getTree()); std::map<int, nniIDs> nniMap; for (auto& node : tree.getNodes()) { if (node->hasFather() && node->getFather()->hasFather()) { auto search = nniMap.find(node->getFatherId()); if (search == nniMap.end()) { nniMap[node->getFatherId()].rearr1 = node->getId(); } else { search->second.rearr2 = node->getId(); }; } } for (auto entry : nniMap) { double lnl1 = -likelihood->testNNI(entry.second.rearr1); double lnl2 = -likelihood->testNNI(entry.second.rearr2); bpp::Number<double> abayes = 1 / (1 + exp(lnl1) + exp(lnl2)); tree.setBranchProperty(entry.first, TreeTools::BOOTSTRAP, abayes); } string s = TreeTools::treeToParenthesis(tree, true, TreeTools::BOOTSTRAP); s.erase(s.find_last_not_of(" \n\r\t")+1); return s; }
int main() { TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("((A:0.001, B:0.002):0.003,C:0.01,D:0.1);"); cout << tree->getNumberOfLeaves() << endl; vector<int> ids = tree->getNodesId(); //------------- NucleicAlphabet* alphabet = new DNA(); SubstitutionModel* model = new GTR(alphabet, 1, 0.2, 0.3, 0.4, 0.4, 0.1, 0.35, 0.35, 0.2); //DiscreteDistribution* rdist = new GammaDiscreteDistribution(4, 0.4, 0.4); DiscreteDistribution* rdist = new ConstantDistribution(1.0); HomogeneousSequenceSimulator simulator(model, rdist, tree); unsigned int n = 100000; map<int, RowMatrix<unsigned int> > counts; for (size_t j = 0; j < ids.size() - 1; ++j) //ignore root, the last id counts[ids[j]].resize(4, 4); for (unsigned int i = 0; i < n; ++i) { RASiteSimulationResult* result = simulator.dSimulate(); for (size_t j = 0; j < ids.size() - 1; ++j) { //ignore root, the last id result->getMutationPath(ids[j]).getEventCounts(counts[ids[j]]); } delete result; } map<int, RowMatrix<double> >freqs; map<int, double> sums; for (size_t k = 0; k < ids.size() - 1; ++k) { //ignore root, the last id RowMatrix<double>* freqsP = &freqs[ids[k]]; RowMatrix<unsigned int>* countsP = &counts[ids[k]]; freqsP->resize(4, 4); for (unsigned int i = 0; i < 4; ++i) for (unsigned int j = 0; j < 4; ++j) (*freqsP)(i, j) = static_cast<double>((*countsP)(i, j)) / (static_cast<double>(n)); //For now we simply compare the total number of substitutions: sums[ids[k]] = MatrixTools::sumElements(*freqsP); cout << "Br" << ids[k] << " BrLen = " << tree->getDistanceToFather(ids[k]) << " counts = " << sums[ids[k]] << endl; MatrixTools::print(*freqsP); } //We should compare this matrix with the expected one! for (size_t k = 0; k < ids.size() - 1; ++k) { //ignore root, the last id if (abs(sums[ids[k]] - tree->getDistanceToFather(ids[k])) > 0.01) { delete tree; delete alphabet; delete model; delete rdist; return 1; } } //------------- delete tree; delete alphabet; delete model; delete rdist; //return (abs(obs - 0.001) < 0.001 ? 0 : 1); return 0; }
string TreeTemplateTools::treeToParenthesis(const TreeTemplate<Node>& tree, bool writeId) { ostringstream s; s << "("; const Node* node = tree.getRootNode(); if (node->hasNoSon()) { s << node->getName(); for (size_t i = 0; i < node->getNumberOfSons(); ++i) { s << "," << nodeToParenthesis(*node->getSon(i), writeId); } } else { s << nodeToParenthesis(*node->getSon(0), writeId); for (size_t i = 1; i < node->getNumberOfSons(); ++i) { s << "," << nodeToParenthesis(*node->getSon(i), writeId); } } s << ")"; if (node->hasDistanceToFather()) s << ":" << node->getDistanceToFather(); s << ";" << endl; return s.str(); }
double TreeTemplateTools::getRadius(TreeTemplate<Node>& tree) { TreeTemplateTools::midRoot(tree, MIDROOT_SUM_OF_SQUARES, false); Moments_ moments = getSubtreeMoments_(tree.getRootNode()); double radius = moments.sum / moments.numberOfLeaves; return radius; }
TreeTemplate<Node>* TreeTemplateTools::parenthesisToTree(const string& description, bool bootstrap, const string& propertyName, bool withId, bool verbose) throw (Exception) { string::size_type semi = description.rfind(';'); if (semi == string::npos) throw Exception("TreeTemplateTools::parenthesisToTree(). Bad format: no semi-colon found."); string content = description.substr(0, semi); unsigned int nodeCounter = 0; Node* node = parenthesisToNode(content, nodeCounter, bootstrap, propertyName, withId, verbose); TreeTemplate<Node>* tree = new TreeTemplate<Node>(); tree->setRootNode(node); if (!withId) { tree->resetNodesId(); } if (verbose) { (*ApplicationTools::message) << " nodes loaded."; ApplicationTools::message->endLine(); } return tree; }
RecursiveLikelihoodTree::RecursiveLikelihoodTree(const SubstitutionProcess& process, bool usepatterns) : AbstractLikelihoodTree(process), vTree_(), patternLinks_(), usePatterns_(usepatterns), initializedAboveLikelihoods_(false) { TreeTemplate<Node> tree = process.getParametrizableTree().getTree(); RecursiveLikelihoodNode* rCN = TreeTemplateTools::cloneSubtree<RecursiveLikelihoodNode>(*tree.getRootNode()); TreeTemplate<RecursiveLikelihoodNode>* pTC = new TreeTemplate<RecursiveLikelihoodNode>(rCN); for (size_t i = 0; i < nbClasses_; i++) { TreeTemplate<RecursiveLikelihoodNode>* pTC2 = pTC->clone(); vTree_.push_back(pTC2); } delete pTC; }
Tree::Tree( const TreeTemplate& tmpl ) : mPosition( 0.0, 0.0, 0.0 ), mAngle( 0.0, 0.0, 0.0 ), mScale( 1.0, 1.0, 1.0 ), mNodes( 0 ), mNodeNumber( 0 ), mTime( 0.0 ){ mNodeNumber = tmpl.nodeNumber(); mNodes = new Node[ mNodeNumber ]; for ( int i = 0; i < mNodeNumber; ++i ){ Node& dst = mNodes[ i ]; const NodeTemplate* src = tmpl.node( i ); //パラメータを移す dst.setTranslation( *src->translation() ); dst.setRotation( *src->rotation() ); dst.setScale( *src->scale() ); dst.setName( src->name()->c_str() ); dst.setBatch( src->batch() ); //[長男-兄弟形式から子配列形式への変換] //子の数を数える int child = src->child(); int childNumber = 0; while ( child >= 0 ){ ++childNumber; child = tmpl.node( child )->brother(); } //子をアロケート dst.setChildNumber( childNumber ); //子を充填する child = src->child(); int j = 0; while ( child >= 0 ){ dst.setChild( j, mNodes + child ); child = tmpl.node( child )->brother(); ++j; } } }
TreeTemplate<Node>* TreeTemplateTools::getRandomTree(vector<string>& leavesNames, bool rooted) { if (leavesNames.size() == 0) return 0; // No taxa. // This vector will contain all nodes. // Start with all leaves, and then group nodes randomly 2 by 2. // Att the end, contains only the root node of the tree. vector<Node*> nodes(leavesNames.size()); // Create all leaves nodes: for (size_t i = 0; i < leavesNames.size(); ++i) { nodes[i] = new Node(leavesNames[i]); } // Now group all nodes: while (nodes.size() > (rooted ? 2 : 3)) { // Select random nodes: size_t pos1 = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(nodes.size()); Node* node1 = nodes[pos1]; nodes.erase(nodes.begin() + static_cast<ptrdiff_t>(pos1)); size_t pos2 = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(nodes.size()); Node* node2 = nodes[pos2]; nodes.erase(nodes.begin() + static_cast<ptrdiff_t>(pos2)); // Add new node: Node* parent = new Node(); parent->addSon(node1); parent->addSon(node2); nodes.push_back(parent); } // Return tree with last node as root node: Node* root = new Node(); for (size_t i = 0; i < nodes.size(); ++i) { root->addSon(nodes[i]); } TreeTemplate<Node>* tree = new TreeTemplate<Node>(root); tree->resetNodesId(); return tree; }
int main() { TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("(((A:0.01, B:0.01):0.02,C:0.03):0.01,D:0.04);"); vector<string> seqNames = tree->getLeavesNames(); vector<int> ids = tree->getNodesId(); //------------- const NucleicAlphabet* alphabet = &AlphabetTools::DNA_ALPHABET; SubstitutionModel* model = new T92(alphabet, 3.); DiscreteDistribution* rdist = new GammaDiscreteDistribution(4, 1.0); rdist->aliasParameters("alpha", "beta"); VectorSiteContainer sites(alphabet); sites.addSequence(BasicSequence("A", "AAATGGCTGTGCACGTC", alphabet)); sites.addSequence(BasicSequence("B", "AACTGGATCTGCATGTC", alphabet)); sites.addSequence(BasicSequence("C", "ATCTGGACGTGCACGTG", alphabet)); sites.addSequence(BasicSequence("D", "CAACGGGAGTGCGCCTA", alphabet)); try { fitModelH(model, rdist, *tree, sites, 93.017264552603336369, 71.265543199977557265); } catch (Exception& ex) { cerr << ex.what() << endl; return 1; } try { fitModelHClock(model, rdist, *tree, sites, 92.27912072473920090943, 71.26554020984087856050); } catch (Exception& ex) { cerr << ex.what() << endl; return 1; } //------------- delete tree; delete model; delete rdist; return 0; }
string TreeTemplateTools::treeToParenthesis(const TreeTemplate<Node>& tree, bool bootstrap, const string& propertyName) { ostringstream s; s << "("; const Node* node = tree.getRootNode(); if (node->hasNoSon()) { s << node->getName(); for (size_t i = 0; i < node->getNumberOfSons(); i++) { s << "," << nodeToParenthesis(*node->getSon(i), bootstrap, propertyName); } } else { s << nodeToParenthesis(*node->getSon(0), bootstrap, propertyName); for (size_t i = 1; i < node->getNumberOfSons(); i++) { s << "," << nodeToParenthesis(*node->getSon(i), bootstrap, propertyName); } } s << ")"; if (bootstrap) { if (node->hasBranchProperty(TreeTools::BOOTSTRAP)) s << (dynamic_cast<const Number<double>*>(node->getBranchProperty(TreeTools::BOOTSTRAP))->getValue()); } else { if (node->hasBranchProperty(propertyName)) { const BppString* ppt = dynamic_cast<const BppString*>(node->getBranchProperty(propertyName)); if (ppt) s << *ppt; else throw Exception("TreeTemplateTools::nodeToParenthesis. Property should be a BppString."); } } s << ";" << endl; return s.str(); }
void NexusIOTree::read(std::istream& in, std::vector<Tree*>& trees) const throw (Exception) { // Checking the existence of specified file if (! in) { throw IOException ("NexusIOTree::read(). Failed to read from stream"); } //Look for the TREES block: string line = ""; while (TextTools::toUpper(line) != "BEGIN TREES;") { if (in.eof()) throw Exception("NexusIOTree::read(). No trees block was found."); line = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(in)); } string cmdName = "", cmdArgs = ""; bool cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false); if (! cmdFound) throw Exception("NexusIOTree::read(). Missing tree command."); cmdName = TextTools::toUpper(cmdName); //Look for the TRANSLATE command: map<string, string> translation; bool hasTranslation = false; if (cmdName == "TRANSLATE") { //Parse translation: StringTokenizer st(cmdArgs, ","); while (st.hasMoreToken()) { string tok = TextTools::removeSurroundingWhiteSpaces(st.nextToken()); NestedStringTokenizer nst(tok, "'", "'", " \t"); if (nst.numberOfRemainingTokens() != 2) throw Exception("NexusIOTree::read(). Unvalid translation description."); string name = nst.nextToken(); string tln = nst.nextToken(); translation[name] = tln; } hasTranslation = true; cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false); if (! cmdFound) throw Exception("NexusIOTree::read(). Missing tree command."); else cmdName = TextTools::toUpper(cmdName); } //Now parse the trees: while (cmdFound && cmdName != "END") { if (cmdName != "TREE") throw Exception("NexusIOTree::read(). Unvalid command found: " + cmdName); string::size_type pos = cmdArgs.find("="); if (pos == string::npos) throw Exception("NexusIOTree::read(). unvalid format, should be tree-name=tree-description"); string description = cmdArgs.substr(pos + 1); TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree(description + ";", true); //Now translate leaf names if there is a translation: //(we assume that all trees share the same translation! ===> check!) if (hasTranslation) { vector<Node*> leaves = tree->getLeaves(); for (size_t i = 0; i < leaves.size(); i++) { string name = leaves[i]->getName(); if (translation.find(name) == translation.end()) { throw Exception("NexusIOTree::read(). No translation was given for this leaf: " + name); } leaves[i]->setName(translation[name]); } } trees.push_back(tree); cmdFound = NexusTools::getNextCommand(in, cmdName, cmdArgs, false); if (cmdFound) cmdName = TextTools::toUpper(cmdName); } }
void TreeTemplateTools::midRoot(TreeTemplate<Node>& tree, short criterion, bool forceBranchRoot) { if (criterion != MIDROOT_VARIANCE && criterion != MIDROOT_SUM_OF_SQUARES) throw Exception("TreeTemplateTools::midRoot(). Illegal criterion value '" + TextTools::toString(criterion) + "'"); if (tree.isRooted()) tree.unroot(); Node* ref_root = tree.getRootNode(); // // The bestRoot object records : // -- the current best branch : .first // -- the current best value of the criterion : .second["value"] // -- the best position of the root on the branch : .second["position"] // 0 is toward the original root, 1 is away from it // pair<Node*, map<string, double> > best_root_branch; best_root_branch.first = ref_root; // nota: the root does not correspond to a branch as it has no father best_root_branch.second ["position"] = -1; best_root_branch.second ["score"] = numeric_limits<double>::max(); // find the best root getBestRootInSubtree_(tree, criterion, ref_root, best_root_branch); tree.rootAt(ref_root); // back to the original root // reroot const double pos = best_root_branch.second["position"]; if (pos < 1e-6 or pos > 1 - 1e-6) // The best root position is on a node (this is often the case with the sum of squares criterion) tree.rootAt(pos < 1e-6 ? best_root_branch.first->getFather() : best_root_branch.first); else // The best root position is somewhere on a branch (a new Node is created) { Node* new_root = new Node(); new_root->setId( TreeTools::getMPNUId(tree, tree.getRootId()) ); double root_branch_length = best_root_branch.first->getDistanceToFather(); Node* best_root_father = best_root_branch.first->getFather(); best_root_father->removeSon(best_root_branch.first); best_root_father->addSon(new_root); new_root->addSon(best_root_branch.first); new_root->setDistanceToFather(max(pos * root_branch_length, 1e-6)); best_root_branch.first->setDistanceToFather(max((1 - pos) * root_branch_length, 1e-6)); // The two branches leaving the root must have the same branch properties const vector<string> branch_properties = best_root_branch.first->getBranchPropertyNames(); for (vector<string>::const_iterator p = branch_properties.begin(); p != branch_properties.end(); ++p) { new_root->setBranchProperty(*p, *best_root_branch.first->getBranchProperty(*p)); } tree.rootAt(new_root); } if (forceBranchRoot) // if we want the root to be on a branch, not on a node { Node* orig_root = tree.getRootNode(); vector<Node*> root_sons = orig_root->getSons(); if (root_sons.size() > 2) { Node* nearest = root_sons.at(0); for (vector<Node*>::iterator n = root_sons.begin(); n != root_sons.end(); ++n) { if ((**n).getDistanceToFather() < nearest->getDistanceToFather()) nearest = *n; } const double d = nearest->getDistanceToFather(); Node* new_root = new Node(); new_root->setId( TreeTools::getMPNUId(tree, tree.getRootId()) ); orig_root->removeSon(nearest); orig_root->addSon(new_root); new_root->addSon(nearest); new_root->setDistanceToFather(d / 2.); nearest->setDistanceToFather(d / 2.); const vector<string> branch_properties = nearest->getBranchPropertyNames(); for (vector<string>::const_iterator p = branch_properties.begin(); p != branch_properties.end(); ++p) { new_root->setBranchProperty(*p, *nearest->getBranchProperty(*p)); } tree.rootAt(new_root); } } }
int main() { TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("(((A:0.1, B:0.2):0.3,C:0.1):0.2,(D:0.3,(E:0.2,F:0.05):0.1):0.1);"); vector<string> seqNames= tree->getLeavesNames(); vector<int> ids = tree->getNodesId(); //------------- const NucleicAlphabet* alphabet = &AlphabetTools::DNA_ALPHABET; FrequenciesSet* rootFreqs = new GCFrequenciesSet(alphabet); SubstitutionModel* model = new T92(alphabet, 3.); std::vector<std::string> globalParameterNames; globalParameterNames.push_back("T92.kappa"); //Very difficult to optimize on small datasets: DiscreteDistribution* rdist = new GammaDiscreteRateDistribution(4, 1.0); ParametrizableTree* parTree = new ParametrizableTree(*tree); FrequenciesSet* rootFreqs2 = rootFreqs->clone(); DiscreteDistribution* rdist2 = rdist->clone(); SubstitutionModel* model2=model->clone(); map<string, string> alias; SubstitutionModelSet* modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, alias, globalParameterNames); unique_ptr<SubstitutionModelSet> modelSetSim(modelSet->clone()); NonHomogeneousSubstitutionProcess* subPro= NonHomogeneousSubstitutionProcess::createNonHomogeneousSubstitutionProcess(model2, rdist2, rootFreqs2, parTree, globalParameterNames); // Simulation size_t nsites = 1000; unsigned int nrep = 20; size_t nmodels = modelSet->getNumberOfModels(); vector<double> thetas(nmodels); vector<double> thetasEst1(nmodels); vector<double> thetasEst2(nmodels); vector<double> thetasEst1n(nmodels); vector<double> thetasEst2n(nmodels); for (size_t i = 0; i < nmodels; ++i) { double theta = RandomTools::giveRandomNumberBetweenZeroAndEntry(0.99) + 0.005; cout << "Theta" << i << " set to " << theta << endl; modelSetSim->setParameterValue("T92.theta_" + TextTools::toString(i + 1), theta); //subPro->setParameterValue("T92.theta_" + TextTools::toString(i + 1), theta); thetas[i] = theta; } NonHomogeneousSequenceSimulator simulator(modelSetSim.get(), rdist, tree); NonHomogeneousSubstitutionProcess* subPro2 = subPro->clone(); for (unsigned int j = 0; j < nrep; j++) { OutputStream* profiler = new StlOutputStream(new ofstream("profile.txt", ios::out)); OutputStream* messenger = new StlOutputStream(new ofstream("messages.txt", ios::out)); //Simulate data: unique_ptr<SiteContainer> sites(simulator.simulate(nsites)); //Now fit model: unique_ptr<SubstitutionModelSet> modelSet2(modelSet->clone()); RNonHomogeneousTreeLikelihood tl(*tree, *sites.get(), modelSet, rdist, true, true, false); tl.initialize(); RNonHomogeneousTreeLikelihood tl2(*tree, *sites.get(), modelSet2.get(), rdist, true, true, true); tl2.initialize(); SubstitutionProcess* nsubPro=subPro->clone(); SubstitutionProcess* nsubPro2=subPro2->clone(); RecursiveLikelihoodTreeCalculation* tlComp = new RecursiveLikelihoodTreeCalculation(*sites->clone(), nsubPro, true, false); SingleProcessPhyloLikelihood ntl(nsubPro, tlComp, true); RecursiveLikelihoodTreeCalculation* tlComp2 = new RecursiveLikelihoodTreeCalculation(*sites->clone(), nsubPro2, true); SingleProcessPhyloLikelihood ntl2(nsubPro2, tlComp2, true); for (size_t i = 0; i < nmodels; ++i) { ntl.setParameterValue("T92.theta_" + TextTools::toString(i + 1), thetas[i]); ntl2.setParameterValue("T92.theta_" + TextTools::toString(i + 1), thetas[i]); } cout << setprecision(10) << "OldTL init: " << tl.getValue() << "\t" << tl2.getValue() << endl; cout << setprecision(10) << "NewTL init: " << ntl.getValue() << "\t" << ntl2.getValue() << endl; unsigned int c1 = OptimizationTools::optimizeNumericalParameters2( &tl, tl.getParameters(), 0, 0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON); unsigned int c2 = OptimizationTools::optimizeNumericalParameters2( &tl2, tl2.getParameters(), 0, 0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON); unsigned int nc1 = OptimizationTools::optimizeNumericalParameters2( &ntl, ntl.getParameters(), 0, 0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON); unsigned int nc2 = OptimizationTools::optimizeNumericalParameters2( &ntl2, ntl2.getParameters(), 0, 0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON); cout << "OldTL: " << c1 << ": " << tl.getValue() << "\t" << c2 << ": " << tl2.getValue() << endl; cout << "NewTL: " << nc1 << ": " << ntl.getValue() << "\t" << nc2 << ": " << ntl2.getValue() << endl; cout << "Thetas : " << endl; for (size_t i = 0; i < nmodels; ++i) { // cerr << modelSet->getModel(i)->getParameter("theta").getValue() << "\t" << modelSet2->getModel(i)->getParameter("theta").getValue(); // cerr << "\t" << subPro->getModel(i)->getParameter("theta").getValue() << "\t" << subPro2->getModel(i)->getParameter("theta").getValue() << endl; // if (abs(modelSet2->getModel(i)->getParameter("theta").getValue() - modelSet3->getModel(i)->getParameter("theta").getValue()) > 0.1) // return 1; thetasEst1[i] += modelSet->getModel(i)->getParameter("theta").getValue(); thetasEst2[i] += modelSet2->getModel(i)->getParameter("theta").getValue(); thetasEst1n[i] += dynamic_cast< NonHomogeneousSubstitutionProcess*>(nsubPro)->getModel(i)->getParameter("theta").getValue(); thetasEst2n[i] += dynamic_cast< NonHomogeneousSubstitutionProcess*>(nsubPro2)->getModel(i)->getParameter("theta").getValue(); } } thetasEst1 /= static_cast<double>(nrep); thetasEst2 /= static_cast<double>(nrep); thetasEst1n /= static_cast<double>(nrep); thetasEst2n /= static_cast<double>(nrep); //Now compare estimated values to real ones: cout << "Real" << "\t" << "Est_Old1" << "\t" << "Est_Old2" << "\t"; cout << "Est_New1" << "\t" << "Est_New2" << endl; for (size_t i = 0; i < thetas.size(); ++i) { cout << thetas[i] << "\t" << thetasEst1[i] << "\t" << thetasEst2[i] << "\t"; cout << thetasEst1n[i] << "\t" << thetasEst2n[i] << endl; double diff1 = abs(thetas[i] - thetasEst1[i]); double diff2 = abs(thetas[i] - thetasEst2[i]); double diffn1 = abs(thetas[i] - thetasEst1n[i]); double diffn2 = abs(thetas[i] - thetasEst2n[i]); if (diff1 > 0.2 || diff2 > 0.2 || diffn1 > 0.2 || diffn2 > 0.2) return 1; } return 0; }
int main() { TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("((A:0.001, B:0.002):0.008,C:0.01,D:0.1);"); vector<int> ids = tree->getNodesId(); ids.pop_back(); //Ignore root //------------- CodonAlphabet* alphabet = new CodonAlphabet(&AlphabetTools::DNA_ALPHABET); GeneticCode* gc = new StandardGeneticCode(&AlphabetTools::DNA_ALPHABET); CodonSubstitutionModel* model = new YN98(gc, CodonFrequenciesSet::getFrequenciesSetForCodons(CodonFrequenciesSet::F0, gc)); //SubstitutionModel* model = new CodonRateSubstitutionModel( // gc, // new JCnuc(dynamic_cast<CodonAlphabet*>(alphabet)->getNucleicAlphabet())); cout << model->getNumberOfStates() << endl; MatrixTools::printForR(model->getGenerator(), "g"); DiscreteDistribution* rdist = new ConstantDistribution(1.0); HomogeneousSequenceSimulator simulator(model, rdist, tree); TotalSubstitutionRegister* totReg = new TotalSubstitutionRegister(model); DnDsSubstitutionRegister* dndsReg = new DnDsSubstitutionRegister(model); unsigned int n = 20000; vector< vector<double> > realMap(n); vector< vector< vector<double> > > realMapTotal(n); vector< vector< vector<double> > > realMapDnDs(n); VectorSiteContainer sites(tree->getLeavesNames(), alphabet); for (unsigned int i = 0; i < n; ++i) { ApplicationTools::displayGauge(i, n-1, '='); RASiteSimulationResult* result = simulator.dSimulateSite(); realMap[i].resize(ids.size()); realMapTotal[i].resize(ids.size()); realMapDnDs[i].resize(ids.size()); for (size_t j = 0; j < ids.size(); ++j) { realMap[i][j] = static_cast<double>(result->getSubstitutionCount(ids[j])); realMapTotal[i][j].resize(totReg->getNumberOfSubstitutionTypes()); realMapDnDs[i][j].resize(dndsReg->getNumberOfSubstitutionTypes()); result->getSubstitutionCount(ids[j], *totReg, realMapTotal[i][j]); result->getSubstitutionCount(ids[j], *dndsReg, realMapDnDs[i][j]); if (realMapTotal[i][j][0] != realMap[i][j]) { cerr << "Error, total substitution register provides wrong result." << endl; return 1; } //if (abs(VectorTools::sum(realMapDetailed[i][j]) - realMap[i][j]) > 0.000001) { // cerr << "Error, detailed substitution register provides wrong result." << endl; // return 1; //} } auto_ptr<Site> site(result->getSite(*model)); site->setPosition(static_cast<int>(i)); sites.addSite(*site, false); delete result; } ApplicationTools::displayTaskDone(); //------------- //Now build the substitution vectors with the true model: //Fasta fasta; //fasta.write("Simulations.fasta", sites); DRHomogeneousTreeLikelihood drhtl(*tree, sites, model, rdist); drhtl.initialize(); cout << drhtl.getValue() << endl; SubstitutionCount* sCountAna = new LaplaceSubstitutionCount(model, 10); Matrix<double>* m = sCountAna->getAllNumbersOfSubstitutions(0.001,1); cout << "Analytical total count:" << endl; MatrixTools::print(*m); delete m; ProbabilisticSubstitutionMapping* probMapAna = SubstitutionMappingTools::computeSubstitutionVectors(drhtl, ids, *sCountAna); SubstitutionCount* sCountTot = new NaiveSubstitutionCount(model, totReg); m = sCountTot->getAllNumbersOfSubstitutions(0.001,1); cout << "Simple total count:" << endl; MatrixTools::print(*m); delete m; ProbabilisticSubstitutionMapping* probMapTot = SubstitutionMappingTools::computeSubstitutionVectors(drhtl, ids, *sCountTot); SubstitutionCount* sCountDnDs = new NaiveSubstitutionCount(model, dndsReg); m = sCountDnDs->getAllNumbersOfSubstitutions(0.001,1); cout << "Detailed count, type 1:" << endl; MatrixTools::print(*m); delete m; ProbabilisticSubstitutionMapping* probMapDnDs = SubstitutionMappingTools::computeSubstitutionVectors(drhtl, ids, *sCountDnDs); SubstitutionCount* sCountUniTot = new UniformizationSubstitutionCount(model, totReg); m = sCountUniTot->getAllNumbersOfSubstitutions(0.001,1); cout << "Total count, uniformization method:" << endl; MatrixTools::print(*m); delete m; ProbabilisticSubstitutionMapping* probMapUniTot = SubstitutionMappingTools::computeSubstitutionVectors(drhtl, ids, *sCountUniTot); SubstitutionCount* sCountUniDnDs = new UniformizationSubstitutionCount(model, dndsReg); m = sCountUniDnDs->getAllNumbersOfSubstitutions(0.001,2); cout << "Detailed count, uniformization method, type 2:" << endl; MatrixTools::print(*m); delete m; ProbabilisticSubstitutionMapping* probMapUniDnDs = SubstitutionMappingTools::computeSubstitutionVectors(drhtl, ids, *sCountUniDnDs); //Check per branch: /* //1. Total: for (unsigned int j = 0; j < ids.size(); ++j) { double totalReal = 0; double totalObs1 = 0; double totalObs2 = 0; double totalObs3 = 0; double totalObs4 = 0; double totalObs5 = 0; for (unsigned int i = 0; i < n; ++i) { totalReal += realMap[i][j]; totalObs1 += probMapAna->getNumberOfSubstitutions(ids[j], i, 0); totalObs2 += probMapTot->getNumberOfSubstitutions(ids[j], i, 0); //totalObs3 += VectorTools::sum(probMapDet->getNumberOfSubstitutions(ids[j], i)); totalObs4 += probMapDecTot->getNumberOfSubstitutions(ids[j], i, 0); //totalObs5 += VectorTools::sum(probMapDecDet->getNumberOfSubstitutions(ids[j], i)); } if (tree->isLeaf(ids[j])) cout << tree->getNodeName(ids[j]) << "\t"; cout << tree->getDistanceToFather(ids[j]) << "\t" << totalReal << "\t" << totalObs1 << "\t" << totalObs2 << "\t" << totalObs3 << "\t" << totalObs4 << "\t" << totalObs5 << endl; if (abs(totalReal - totalObs1) / totalReal > 0.1) return 1; if (abs(totalReal - totalObs2) / totalReal > 0.1) return 1; if (abs(totalReal - totalObs3) / totalReal > 0.1) return 1; if (abs(totalReal - totalObs4) / totalReal > 0.1) return 1; } //2. Detail: for (unsigned int j = 0; j < ids.size(); ++j) { vector<double> real(4, 0); vector<double> obs1(4, 0); vector<double> obs2(4, 0); for (unsigned int i = 0; i < n; ++i) { real += realMapDetailed[i][j]; //VectorTools::print(real); //vector<double> c = probMapDet->getNumberOfSubstitutions(ids[j], i); //VectorTools::print(c); //obs1 += probMapDet->getNumberOfSubstitutions(ids[j], i); //obs2 += probMapDecDet->getNumberOfSubstitutions(ids[j], i); } if (tree->isLeaf(ids[j])) cout << tree->getNodeName(ids[j]) << "\t"; cout << tree->getDistanceToFather(ids[j]) << "\t"; for (unsigned int t = 0; t < 4; ++t) { cout << obs1[t] << "/" << real[t] << "\t"; cout << obs2[t] << "/" << real[t] << "\t"; } cout << endl; //if (abs(totalReal - totalObs) / totalReal > 0.1) return 1; } */ //------------- delete tree; delete alphabet; delete model; delete rdist; delete sCountTot; delete sCountDnDs; delete probMapAna; delete probMapTot; delete probMapDnDs; delete probMapUniTot; delete probMapUniDnDs; //return (abs(obs - 0.001) < 0.001 ? 0 : 1); return 0; }
void TreeTemplateTools::getBestRootInSubtree_(TreeTemplate<Node>& tree, short criterion, Node* node, pair<Node*, map<string, double> >& bestRoot) { const vector<Node*> sons = node->getSons(); // copy tree.rootAt(node); // Try to place the root on each branch downward node for (vector<Node*>::const_iterator son = sons.begin(); son != sons.end(); ++son) { // Compute the moment of the subtree on son's side Moments_ son_moment = getSubtreeMoments_(*son); // Compute the moment of the subtree on node's side tree.rootAt(*son); Moments_ node_moment = getSubtreeMoments_(node); tree.rootAt(node); /* * Get the position of the root on this branch that * minimizes the root-to-leaves distances variance. * * This variance can be written in the form A x^2 + B x + C */ double min_criterion_value; double best_position; // 0 is toward the root, 1 is away from it const TreeTemplateTools::Moments_& m1 = node_moment; const TreeTemplateTools::Moments_& m2 = son_moment; const double d = (**son).getDistanceToFather(); const double n1 = m1.numberOfLeaves; const double n2 = m2.numberOfLeaves; double A = 0, B = 0, C = 0; if (criterion == MIDROOT_SUM_OF_SQUARES) { A = (n1 + n2) * d * d; B = 2 * d * (m1.sum - m2.sum) - 2 * n2 * d * d; C = m1.squaresSum + m2.squaresSum + 2 * m2.sum * d + n2 * d * d; } else if (criterion == MIDROOT_VARIANCE) { A = 4 * n1 * n2 * d * d; B = 4 * d * ( n2 * m1.sum - n1 * m2.sum - d * n1 * n2); C = (n1 + n2) * (m1.squaresSum + m2.squaresSum) + n1 * d * n2 * d + 2 * n1 * d * m2.sum - 2 * n2 * d * m1.sum - (m1.sum + m2.sum) * (m1.sum + m2.sum); } if (A < 1e-20) { min_criterion_value = numeric_limits<double>::max(); best_position = 0.5; } else { min_criterion_value = C - B * B / (4 * A); best_position = -B / (2 * A); if (best_position < 0) { best_position = 0; min_criterion_value = C; } else if (best_position > 1) { best_position = 1; min_criterion_value = A + B + C; } } // Is this branch is the best seen, update 'bestRoot' if (min_criterion_value < bestRoot.second["score"]) { bestRoot.first = *son; bestRoot.second["position"] = best_position; bestRoot.second["score"] = min_criterion_value; } // Recurse TreeTemplateTools::getBestRootInSubtree_(tree, criterion, *son, bestRoot); } }
TreeTemplate<Node>* BipartitionList::toTree() const throw (Exception) { BipartitionList* sortedBipL; vector<int*> sortedBitBipL; int* bip; vector<Node*> vecNd, sonNd; vector<bool> alive; size_t lword, nbword, nbint, ii; /* check, copy and prepare bipartition list */ if (!BipartitionList::areAllCompatible()) throw Exception("Trying to build a tree from incompatible bipartitions"); sortedBipL = dynamic_cast<BipartitionList*>(clone()); for (size_t i = 0; i < sortedBipL->getNumberOfBipartitions(); i++) { if (sortedBipL->getPartitionSize(i) > sortedBipL->getNumberOfElements() / 2) sortedBipL->flip(i); } sortedBipL->sortByPartitionSize(); sortedBipL->removeRedundantBipartitions(); sortedBitBipL = sortedBipL->getBitBipartitionList(); for (size_t i = 0; i < sortedBipL->getNumberOfBipartitions(); i++) { alive.push_back(true); } vecNd.resize(sortedBipL->getNumberOfBipartitions() + 1); lword = static_cast<size_t>(BipartitionTools::LWORD); nbword = (elements_.size() + lword - 1) / lword; nbint = nbword * lword / (CHAR_BIT * sizeof(int)); bip = new int[1]; bip[0] = 0; /* main loop: create one node per bipartition */ for (size_t i = 0; i < sortedBipL->getNumberOfBipartitions(); i++) { if (sortedBipL->getPartitionSize(i) == 1) { // terminal for (size_t j = 0; j < sortedBipL->getNumberOfElements(); j++) { if (BipartitionTools::testBit(sortedBitBipL[i], static_cast<int>(j))) { vecNd[i] = new Node(elements_[j]); break; } } } else { // internal sonNd.clear(); for (size_t j = 0; j < i; j++) { if (alive[j]) { for (ii = 0; ii < nbint; ii++) { BipartitionTools::bitOr(bip, sortedBitBipL[j] + ii, sortedBitBipL[i] + ii, 1); if (bip[0] != sortedBitBipL[i][ii]) break; } if (ii == nbint) { sonNd.push_back(vecNd[j]); alive[j] = false; } } } vecNd[i] = new Node(); for (size_t k = 0; k < sonNd.size(); k++) { vecNd[i]->addSon(sonNd[k]); } } } /* create last node, which joins alive bipartitions = fatherless nodes */ Node* rootNd = new Node(); for (size_t i = 0; i < sortedBipL->getNumberOfBipartitions(); i++) { if (alive[i]) rootNd->addSon(vecNd[i]); } /* construct tree and return */ TreeTemplate<Node>* tr = new TreeTemplate<Node>(rootNd); tr->resetNodesId(); delete sortedBipL; return tr; }
void ClusterTools::computeNormProperties(TreeTemplate<Node>& tree, const ProbabilisticSubstitutionMapping & mapping) { double min; computeNormProperties_(tree.getRootNode(), mapping, min); }
ProbabilisticRewardMapping* RewardMappingTools::computeRewardVectors( const DRTreeLikelihood& drtl, const vector<int>& nodeIds, Reward& reward, bool verbose) throw (Exception) { // Preamble: if (!drtl.isInitialized()) throw Exception("RewardMappingTools::computeRewardVectors(). Likelihood object is not initialized."); // A few variables we'll need: const TreeTemplate<Node> tree(drtl.getTree()); const SiteContainer* sequences = drtl.getData(); const DiscreteDistribution* rDist = drtl.getRateDistribution(); size_t nbSites = sequences->getNumberOfSites(); size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites(); size_t nbStates = sequences->getAlphabet()->getSize(); size_t nbClasses = rDist->getNumberOfCategories(); vector<const Node*> nodes = tree.getNodes(); const vector<size_t>* rootPatternLinks = &drtl.getLikelihoodData()->getRootArrayPositions(); nodes.pop_back(); // Remove root node. size_t nbNodes = nodes.size(); // We create a new ProbabilisticRewardMapping object: ProbabilisticRewardMapping* rewards = new ProbabilisticRewardMapping(tree, &reward, nbSites); // Store likelihood for each rate for each site: VVVdouble lik; drtl.computeLikelihoodAtNode(tree.getRootId(), lik); Vdouble Lr(nbDistinctSites, 0); Vdouble rcProbs = rDist->getProbabilities(); Vdouble rcRates = rDist->getCategories(); for (size_t i = 0; i < nbDistinctSites; i++) { VVdouble* lik_i = &lik[i]; for (size_t c = 0; c < nbClasses; c++) { Vdouble* lik_i_c = &(*lik_i)[c]; double rc = rDist->getProbability(c); for (size_t s = 0; s < nbStates; s++) { Lr[i] += (*lik_i_c)[s] * rc; } } } // Compute the reward for each class and each branch in the tree: if (verbose) ApplicationTools::displayTask("Compute joint node-pairs likelihood", true); for (size_t l = 0; l < nbNodes; ++l) { // For each node, const Node* currentNode = nodes[l]; if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId())) continue; const Node* father = currentNode->getFather(); double d = currentNode->getDistanceToFather(); if (verbose) ApplicationTools::displayGauge(l, nbNodes - 1); Vdouble rewardsForCurrentNode(nbDistinctSites); // Now we've got to compute likelihoods in a smart manner... ;) VVVdouble likelihoodsFatherConstantPart(nbDistinctSites); for (size_t i = 0; i < nbDistinctSites; i++) { VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; likelihoodsFatherConstantPart_i->resize(nbClasses); for (size_t c = 0; c < nbClasses; c++) { Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; likelihoodsFatherConstantPart_i_c->resize(nbStates); double rc = rDist->getProbability(c); for (size_t s = 0; s < nbStates; s++) { // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s); // freq is already accounted in the array (*likelihoodsFatherConstantPart_i_c)[s] = rc; } } } // First, what will remain constant: size_t nbSons = father->getNumberOfSons(); for (size_t n = 0; n < nbSons; n++) { const Node* currentSon = father->getSon(n); if (currentSon->getId() != currentNode->getId()) { const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId()); // Now iterate over all site partitions: auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId())); VVVdouble pxy; bool first; while (mit->hasNext()) { TreeLikelihood::ConstBranchModelDescription* bmd = mit->next(); auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator()); first = true; while (sit->hasNext()) { size_t i = sit->next(); // We retrieve the transition probabilities for this site partition: if (first) { pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i); first = false; } const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i]; VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; c++) { const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c]; Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; VVdouble* pxy_c = &pxy[c]; for (size_t x = 0; x < nbStates; x++) { Vdouble* pxy_c_x = &(*pxy_c)[x]; double likelihood = 0.; for (size_t y = 0; y < nbStates; y++) { likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y]; } (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood; } } } } } } if (father->hasFather()) { const Node* currentSon = father->getFather(); const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId()); // Now iterate over all site partitions: auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId())); VVVdouble pxy; bool first; while (mit->hasNext()) { TreeLikelihood::ConstBranchModelDescription* bmd = mit->next(); auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator()); first = true; while (sit->hasNext()) { size_t i = sit->next(); // We retrieve the transition probabilities for this site partition: if (first) { pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i); first = false; } const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i]; VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; c++) { const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c]; Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; VVdouble* pxy_c = &pxy[c]; for (size_t x = 0; x < nbStates; x++) { double likelihood = 0.; for (size_t y = 0; y < nbStates; y++) { Vdouble* pxy_c_x = &(*pxy_c)[y]; likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y]; } (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood; } } } } } else { // Account for root frequencies: for (size_t i = 0; i < nbDistinctSites; i++) { vector<double> freqs = drtl.getRootFrequencies(i); VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; c++) { Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; for (size_t x = 0; x < nbStates; x++) { (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x]; } } } } // Then, we deal with the node of interest. // We first average upon 'y' to save computations, and then upon 'x'. // ('y' is the state at 'node' and 'x' the state at 'father'.) // Iterate over all site partitions: const VVVdouble* likelihoodsFather_node = &(drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId())); auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId())); VVVdouble pxy; bool first; while (mit->hasNext()) { TreeLikelihood::ConstBranchModelDescription* bmd = mit->next(); reward.setSubstitutionModel(bmd->getModel()); // compute all nxy first: VVVdouble nxy(nbClasses); for (size_t c = 0; c < nbClasses; ++c) { VVdouble* nxy_c = &nxy[c]; double rc = rcRates[c]; Matrix<double>* nij = reward.getAllRewards(d * rc); nxy_c->resize(nbStates); for (size_t x = 0; x < nbStates; ++x) { Vdouble* nxy_c_x = &(*nxy_c)[x]; nxy_c_x->resize(nbStates); for (size_t y = 0; y < nbStates; ++y) { (*nxy_c_x)[y] = (*nij)(x, y); } } delete nij; } // Now loop over sites: auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator()); first = true; while (sit->hasNext()) { size_t i = sit->next(); // We retrieve the transition probabilities and substitution counts for this site partition: if (first) { pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i); first = false; } const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i]; VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i]; for (size_t c = 0; c < nbClasses; ++c) { const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c]; Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c]; const VVdouble* pxy_c = &pxy[c]; VVdouble* nxy_c = &nxy[c]; for (size_t x = 0; x < nbStates; ++x) { double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x]; const Vdouble* pxy_c_x = &(*pxy_c)[x]; for (size_t y = 0; y < nbStates; ++y) { double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x) * (*pxy_c_x)[y] * (*likelihoodsFather_node_i_c)[y]; // Now the vector computation: rewardsForCurrentNode[i] += likelihood_cxy * (*nxy_c)[x][y]; // <------------> <---------------> // Posterior probability | | // for site i and rate class c * | | // likelihood for this site------+ | // | // Reward function for site i and rate class c------+ } } } } } // Now we just have to copy the substitutions into the result vector: for (size_t i = 0; i < nbSites; ++i) { (*rewards)(l, i) = rewardsForCurrentNode[(*rootPatternLinks)[i]] / Lr[(*rootPatternLinks)[i]]; } } if (verbose) { if (ApplicationTools::message) *ApplicationTools::message << " "; ApplicationTools::displayTaskDone(); } return rewards; }
int main() { TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("(((A:0.1, B:0.2):0.3,C:0.1):0.2,(D:0.3,(E:0.2,F:0.05):0.1):0.1);"); vector<string> seqNames= tree->getLeavesNames(); vector<int> ids = tree->getNodesId(); //------------- const NucleicAlphabet* alphabet = &AlphabetTools::DNA_ALPHABET; FrequenciesSet* rootFreqs = new GCFrequenciesSet(alphabet); SubstitutionModel* model = new T92(alphabet, 3.); std::vector<std::string> globalParameterNames; globalParameterNames.push_back("T92.kappa"); map<string, string> alias; SubstitutionModelSet* modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, alias, globalParameterNames); //DiscreteDistribution* rdist = new ConstantDistribution(1.0, true); //Very difficult to optimize on small datasets: DiscreteDistribution* rdist = new GammaDiscreteRateDistribution(4, 1.0); size_t nsites = 1000; unsigned int nrep = 20; size_t nmodels = modelSet->getNumberOfModels(); vector<double> thetas(nmodels); vector<double> thetasEst1(nmodels); vector<double> thetasEst2(nmodels); for (size_t i = 0; i < nmodels; ++i) { double theta = RandomTools::giveRandomNumberBetweenZeroAndEntry(0.99) + 0.005; cout << "Theta" << i << " set to " << theta << endl; modelSet->setParameterValue("T92.theta_" + TextTools::toString(i + 1), theta); thetas[i] = theta; } NonHomogeneousSequenceSimulator simulator(modelSet, rdist, tree); for (unsigned int j = 0; j < nrep; j++) { OutputStream* profiler = new StlOutputStream(new ofstream("profile.txt", ios::out)); OutputStream* messenger = new StlOutputStream(new ofstream("messages.txt", ios::out)); //Simulate data: auto_ptr<SiteContainer> sites(simulator.simulate(nsites)); //Now fit model: auto_ptr<SubstitutionModelSet> modelSet2(modelSet->clone()); auto_ptr<SubstitutionModelSet> modelSet3(modelSet->clone()); RNonHomogeneousTreeLikelihood tl(*tree, *sites.get(), modelSet2.get(), rdist, true, true, false); tl.initialize(); RNonHomogeneousTreeLikelihood tl2(*tree, *sites.get(), modelSet3.get(), rdist, true, true, true); tl2.initialize(); unsigned int c1 = OptimizationTools::optimizeNumericalParameters2( &tl, tl.getParameters(), 0, 0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON); unsigned int c2 = OptimizationTools::optimizeNumericalParameters2( &tl2, tl2.getParameters(), 0, 0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON); cout << c1 << ": " << tl.getValue() << "\t" << c2 << ": " << tl2.getValue() << endl; for (size_t i = 0; i < nmodels; ++i) { cout << modelSet2->getModel(i)->getParameter("theta").getValue() << "\t" << modelSet3->getModel(i)->getParameter("theta").getValue() << endl; //if (abs(modelSet2->getModel(i)->getParameter("theta").getValue() - modelSet3->getModel(i)->getParameter("theta").getValue()) > 0.1) // return 1; thetasEst1[i] += modelSet2->getModel(i)->getParameter("theta").getValue(); thetasEst2[i] += modelSet3->getModel(i)->getParameter("theta").getValue(); } } thetasEst1 /= static_cast<double>(nrep); thetasEst2 /= static_cast<double>(nrep); //Now compare estimated values to real ones: for (size_t i = 0; i < thetas.size(); ++i) { cout << thetas[i] << "\t" << thetasEst1[i] << "\t" << thetasEst2[i] << endl; double diff1 = abs(thetas[i] - thetasEst1[i]); double diff2 = abs(thetas[i] - thetasEst2[i]); if (diff1 > 0.2 || diff2 > 0.2) return 1; } //------------- delete tree; delete modelSet; delete rdist; return 0; }
int main() { //Get some leaf names: vector<string> leaves(100); for (size_t i = 0; i < leaves.size(); ++i) leaves[i] = "leaf" + TextTools::toString(i); for (unsigned int j = 0; j < 1000; ++j) { //Generate a random tree, without branch lengths: TreeTemplate<Node>* tree = TreeTemplateTools::getRandomTree(leaves, true); TreeTemplate<Node>* tree2 = new TreeTemplate<Node>(*tree); if (!tree->hasSameTopologyAs(*tree2)) return 1; //Error!!! tree2->getRootNode()->swap(0,1); //cout << "First test passed." << endl; if (!tree->hasSameTopologyAs(*tree2)) return 1; //Error!!! //cout << "Second test passed." << endl; //Convert tree to string and read it again: string newick = TreeTemplateTools::treeToParenthesis(*tree); TreeTemplate<Node>* tree3 = TreeTemplateTools::parenthesisToTree(newick, true, TreeTools::BOOTSTRAP, false, false); if (!tree->hasSameTopologyAs(*tree3)) return 1; //Error!!! //cout << "Third test passed." << endl; //------------- delete tree; delete tree2; delete tree3; } //Try to parse a string: TreeTemplate<Node>* tree4 = TreeTemplateTools::parenthesisToTree("((A:1,B:2):3,C:4);"); cout << TreeTemplateTools::treeToParenthesis(*tree4) << endl; delete tree4; TreeTemplate<Node>* tree5 = TreeTemplateTools::parenthesisToTree("((A:1,B:2):3,C:4):5;"); cout << TreeTemplateTools::treeToParenthesis(*tree5) << endl; delete tree5; Newick tReader; istringstream iss6("((A,B),C);"); TreeTemplate<Node>* tree6 = tReader.read(iss6); cout << TreeTemplateTools::treeToParenthesis(*tree6) << endl; delete tree6; istringstream iss7("((A:1,B:2):3,C:4):5;"); TreeTemplate<Node>* tree7 = tReader.read(iss7); cout << TreeTemplateTools::treeToParenthesis(*tree7) << endl; delete tree7; istringstream iss8("((A:1,B:2)80:3,C:4)2:5;"); TreeTemplate<Node>* tree8 = tReader.read(iss8); cout << TreeTemplateTools::treeToParenthesis(*tree8) << endl; vector<int> ids = tree8->getNodesId(); for (size_t i = 0; i < ids.size(); ++i) { cout << "Node " << ids[i] << ":" << endl; if (tree8->getNode(ids[i])->hasBranchProperty(TreeTools::BOOTSTRAP)) cout << "N: BOOTSTRAP=" << dynamic_cast<Number<double>*>(tree8->getNode(ids[i])->getBranchProperty(TreeTools::BOOTSTRAP))->getValue() << endl; vector<string> branchPpt = tree8->getNode(ids[i])->getBranchPropertyNames(); } delete tree8; istringstream iss9("((A,B)aa,C)2;"); tReader.enableExtendedBootstrapProperty("ESS"); TreeTemplate<Node>* tree9 = tReader.read(iss9); cout << TreeTemplateTools::treeToParenthesis(*tree9) << endl; ids = tree9->getNodesId(); for (size_t i = 0; i < ids.size(); ++i) { cout << "Node " << ids[i] << ":" << endl; vector<string> nodePpt = tree9->getNode(ids[i])->getNodePropertyNames(); for (size_t j = 0; j < nodePpt.size(); ++j) if (tree9->getNode(ids[i])->hasNodeProperty(nodePpt[j])) cout << "N: " << nodePpt[j] << "=" << dynamic_cast<BppString*>(tree9->getNode(ids[i])->getNodeProperty(nodePpt[j]))->toSTL() << endl; vector<string> branchPpt = tree9->getNode(ids[i])->getBranchPropertyNames(); for (size_t j = 0; j < branchPpt.size(); ++j) if (tree9->getNode(ids[i])->hasBranchProperty(branchPpt[j])) cout << "B: " << branchPpt[j] << "=" << dynamic_cast<BppString*>(tree9->getNode(ids[i])->getBranchProperty(branchPpt[j]))->toSTL() << endl; } delete tree9; //Test file parsing: TreeTemplate<Node>* tree10 = TreeTemplateTools::getRandomTree(leaves, true); Newick tWriter; tWriter.write(*tree10, "tmp_tree.dnd"); Tree* test = tReader.read("tmp_tree.dnd"); if (!TreeTools::haveSameTopology(*tree10, *test)) return 1; cout << "Newick I/O ok." << endl; //Multiple trees: vector<Tree *> trees; for (unsigned int i = 0; i < 100; ++i) { trees.push_back(TreeTemplateTools::getRandomTree(leaves, true)); } tWriter.write(trees, "tmp_trees.dnd"); vector<Tree *> trees2; tReader.read("tmp_trees.dnd", trees2); for (unsigned int i = 0; i < 100; ++i) { if (!TreeTools::haveSameTopology(*trees[i], *trees2[i])) { cerr << "Tree " << i << " failed to write and/or read!" << endl; return 1; } } cout << "Newick multiple I/O ok." << endl; for (unsigned int i = 0; i < 100; ++i) { delete trees[i]; delete trees2[i]; } //Try newick read on non-file: cout << "Testing parsing a directory..." << endl; try { Tree* tmp = tReader.read("test/"); cerr << "Arg, reading on directory should fail!" << endl; if (tmp) { cerr << "Output of read on directory is not NULL!" << endl; } return 1; } catch (Exception& ex) { cout << "Ok, reading on directory throws exception!" << endl; } cout << "Testing parsing a directory for multiple trees..." << endl; try { vector<Tree*> treesTmp; tReader.read("test/", treesTmp); if (treesTmp.size() != 0) { cerr << "Output of multiple read on directory is not 0!" << endl; return 1; } else { cout << "Ok, reading on directory returns a vector of size 0!" << endl; } } catch(Exception& ex) { cout << "Error, no exception should be thrown here!" << endl; } //Now try some weird cases, to see if we handle them properly: //single node tree: cout << "Testing a tree with a node of degree 2:" << endl; TreeTemplate<Node>* weird1 = TreeTemplateTools::parenthesisToTree("((A:1):2.0,B);"); if (weird1->getNodes().size() != 4) { cout << "Error, tree has " << weird1->getNodes().size() << " node(s) instead of 4!" << endl; VectorTools::print(weird1->getLeavesNames()); return 1; } cout << TreeTemplateTools::treeToParenthesis(*weird1) << endl; delete weird1; cout << "Testing a tree with a node of degree 2, without branch length:" << endl; TreeTemplate<Node>* weird2 = TreeTemplateTools::parenthesisToTree("((A),B);"); if (weird2->getNodes().size() != 4) { cout << "Error, tree has " << weird2->getNodes().size() << " node(s) instead of 4!" << endl; VectorTools::print(weird2->getLeavesNames()); return 1; } cout << TreeTemplateTools::treeToParenthesis(*weird2) << endl; delete weird2; cout << "Testing a tree with several single nodes:" << endl; TreeTemplate<Node>* weird3 = TreeTemplateTools::parenthesisToTree("((((((A)):1)):3),B);"); if (weird3->getNodes().size() != 8) { cout << "Error, tree has " << weird3->getNodes().size() << " node(s) instead of 8!" << endl; VectorTools::print(weird3->getLeavesNames()); return 1; } cout << TreeTemplateTools::treeToParenthesis(*weird3) << endl; delete weird3; cout << "Testing a tree with a single leaf:" << endl; TreeTemplate<Node>* weird4 = TreeTemplateTools::parenthesisToTree("(A:1.0);"); if (weird4->getNodes().size() != 2) { cout << "Error, tree has " << weird4->getNodes().size() << " node(s) instead of 2!" << endl; VectorTools::print(weird4->getLeavesNames()); return 1; } cout << TreeTemplateTools::treeToParenthesis(*weird4) << endl; delete weird4; cout << "Testing a tree with a single node:" << endl; TreeTemplate<Node>* weird5 = TreeTemplateTools::parenthesisToTree("((A:1.0));"); if (weird5->getNodes().size() != 3) { cout << "Error, tree has " << weird5->getNodes().size() << " node(s) instead of 3!" << endl; VectorTools::print(weird5->getLeavesNames()); return 1; } cout << TreeTemplateTools::treeToParenthesis(*weird5) << endl; delete weird5; cout << "Testing a tree with a single node and branch lengths:" << endl; TreeTemplate<Node>* weird6 = TreeTemplateTools::parenthesisToTree("((A:1.0):2.0);"); if (weird6->getNodes().size() != 3) { cout << "Error, tree has " << weird6->getNodes().size() << " node(s) instead of 3!" << endl; VectorTools::print(weird6->getLeavesNames()); return 1; } cout << TreeTemplateTools::treeToParenthesis(*weird6) << endl; delete weird6; return 0; }
int main() { TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("((A:0.01, B:0.02):0.03,C:0.01,D:0.1);"); vector<string> seqNames= tree->getLeavesNames(); vector<int> ids = tree->getNodesId(); //------------- NucleicAlphabet* alphabet = new DNA(); SubstitutionModel* model = new T92(alphabet, 3.); FrequenciesSet* rootFreqs = new GCFrequenciesSet(alphabet); std::vector<std::string> globalParameterNames; globalParameterNames.push_back("T92.kappa"); map<string, string> alias; SubstitutionModelSet* modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, alias, globalParameterNames); DiscreteDistribution* rdist = new ConstantRateDistribution(); vector<double> thetas; for (unsigned int i = 0; i < modelSet->getNumberOfModels(); ++i) { double theta = RandomTools::giveRandomNumberBetweenZeroAndEntry(0.99) + 0.005; cout << "Theta" << i << " set to " << theta << endl; modelSet->setParameterValue("T92.theta_" + TextTools::toString(i + 1), theta); thetas.push_back(theta); } NonHomogeneousSequenceSimulator simulator(modelSet, rdist, tree); unsigned int n = 100000; OutputStream* profiler = new StlOutputStream(new ofstream("profile.txt", ios::out)); OutputStream* messenger = new StlOutputStream(new ofstream("messages.txt", ios::out)); //Check fast simulation first: cout << "Fast check:" << endl; //Generate data set: VectorSiteContainer sites(seqNames, alphabet); for (unsigned int i = 0; i < n; ++i) { auto_ptr<Site> site(simulator.simulateSite()); site->setPosition(static_cast<int>(i)); sites.addSite(*site, false); } //Now fit model: SubstitutionModelSet* modelSet2 = modelSet->clone(); RNonHomogeneousTreeLikelihood tl(*tree, sites, modelSet2, rdist); tl.initialize(); OptimizationTools::optimizeNumericalParameters2( &tl, tl.getParameters(), 0, 0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON); //Now compare estimated values to real ones: for (size_t i = 0; i < thetas.size(); ++i) { cout << thetas[i] << "\t" << modelSet2->getModel(i)->getParameter("theta").getValue() << endl; double diff = abs(thetas[i] - modelSet2->getModel(i)->getParameter("theta").getValue()); if (diff > 0.1) return 1; } delete modelSet2; //Now try detailed simulations: cout << "Detailed check:" << endl; //Generate data set: VectorSiteContainer sites2(seqNames, alphabet); for (unsigned int i = 0; i < n; ++i) { RASiteSimulationResult* result = simulator.dSimulateSite(); auto_ptr<Site> site(result->getSite(*simulator.getSubstitutionModelSet()->getModel(0))); site->setPosition(static_cast<int>(i)); sites2.addSite(*site, false); delete result; } //Now fit model: SubstitutionModelSet* modelSet3 = modelSet->clone(); RNonHomogeneousTreeLikelihood tl2(*tree, sites2, modelSet3, rdist); tl2.initialize(); OptimizationTools::optimizeNumericalParameters2( &tl2, tl2.getParameters(), 0, 0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON); //Now compare estimated values to real ones: for (size_t i = 0; i < thetas.size(); ++i) { cout << thetas[i] << "\t" << modelSet3->getModel(i)->getParameter("theta").getValue() << endl; double diff = abs(thetas[i] - modelSet3->getModel(i)->getParameter("theta").getValue()); if (diff > 0.1) return 1; } delete modelSet3; //------------- delete tree; delete alphabet; delete modelSet; delete rdist; return 0; }
vector<const Node *> ClusterTools::getSubtreesWithSize(const TreeTemplate<Node>& tree, size_t size) { vector<const Node *> subtrees; getSubtreesWithSize(tree.getRootNode(), size, subtrees); return subtrees; }