pair<string,scalar_type> tree_LL_nucl(string tree,string aln_filename,bool optimize_bls,scalar_type tolerance) { //const Alphabet* alphabet = new ProteicAlphabet(); const Alphabet* alphabet = new RNA(); OrderedSequenceContainer *alignment; VectorSiteContainer* sites; Fasta Reader; //NexusIOSequence Reader; //Phylip * Reader=new Phylip(true,true,100,true,"\r"); alignment = Reader.read(aln_filename, alphabet); sites = new VectorSiteContainer(*alignment); SiteContainerTools::removeGapOnlySites(*sites); SiteContainerTools::changeGapsToUnknownCharacters(*sites); TreeTemplate<Node>* ttree1=TreeTemplateTools::parenthesisToTree(tree,false,"ID"); DiscreteRatesAcrossSitesTreeLikelihood* tl1; SubstitutionModel* model = 0; DiscreteDistribution* rDist = 0; model = new GTR(&AlphabetTools::RNA_ALPHABET); model->setFreqFromData(*sites); rDist = new GammaDiscreteDistribution(8, 1, 1); tl1 = new RHomogeneousTreeLikelihood(*ttree1, *sites, model, rDist, true, false, false); tl1->initialize(); if (optimize_bls) { //Newton.. ParameterList * parameters= new ParameterList(); parameters->addParameters( tl1->getBranchLengthsParameters()); parameters->addParameters( tl1->getRateDistributionParameters()); OptimizationTools::optimizeNumericalParameters( dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*> (tl1), //tl1->getParameters(), *parameters, 0, 1, tolerance, 1000, 0, 0, false, 0, OptimizationTools::OPTIMIZATION_NEWTON, //OptimizationTools::OPTIMIZATION_BRENT); OptimizationTools::OPTIMIZATION_BFGS); delete parameters; } scalar_type LL=- tl1->getValue(); //Here's your log likelihood value ! //tl1->getParameters().printParameters(cout); //cout << TreeTemplateTools::treeToParenthesis( tl1->getTree() ) <<endl; pair<string,scalar_type> return_pair; return_pair.first= TreeTemplateTools::treeToParenthesis( tl1->getTree() ) ; return_pair.second=LL; delete sites; delete alphabet; delete model; delete rDist; delete tl1; return return_pair; }
scalar_type tree_LL(string tree,string aln_filename,bool optimize_bls,scalar_type tolerance) { const Alphabet* alphabet = new ProteicAlphabet(); OrderedSequenceContainer *alignment; VectorSiteContainer* sites; Fasta Reader; //Phylip * Reader=new Phylip(true,true,100,true,"\r"); alignment = Reader.read(aln_filename, alphabet); sites = new VectorSiteContainer(*alignment); SiteContainerTools::changeGapsToUnknownCharacters(*sites); TreeTemplate<Node>* ttree1=TreeTemplateTools::parenthesisToTree(tree,false,"ID"); //Newick newick1; //ttree1 = newick1.read(tree); DiscreteRatesAcrossSitesTreeLikelihood* tl1; SubstitutionModel* model = 0; DiscreteDistribution* rDist = 0; model = new LG08(&AlphabetTools::PROTEIN_ALPHABET, new FullProteinFrequenciesSet(&AlphabetTools::PROTEIN_ALPHABET), true); model->setFreqFromData(*sites); rDist = new GammaDiscreteDistribution(4, 1, 1); tl1 = new RHomogeneousTreeLikelihood(*ttree1, *sites, model, rDist, true, false, false); tl1->initialize(); /* if (optimize_bls) { Optimizer* optimizer = new PseudoNewtonOptimizer(tl1); // Optimizer* optimizer = new PseudoNewtonOptimizer(tl1); ParameterList * parameters= new ParameterList(); parameters->addParameters( tl1->getBranchLengthsParameters()); parameters->addParameters( tl1->getRateDistributionParameters()); //Newton.. optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO); optimizer->setProfiler(0); optimizer->setMessageHandler(0); optimizer->setVerbose(0); optimizer->getStopCondition()->setTolerance(0.01); optimizer->init(*parameters); //optimizer->init(tl1->getParameters()); optimizer->setMaximumNumberOfEvaluations(1000); optimizer->optimize(); delete parameters; delete optimizer; } */ if (optimize_bls) { //Newton.. ParameterList * parameters= new ParameterList(); parameters->addParameters( tl1->getBranchLengthsParameters()); parameters->addParameters( tl1->getRateDistributionParameters()); OptimizationTools::optimizeNumericalParameters( dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*> (tl1), //tl1->getParameters(), *parameters, 0, 1, tolerance, 1000, 0, 0, false, 0, OptimizationTools::OPTIMIZATION_NEWTON, //OptimizationTools::OPTIMIZATION_BRENT); OptimizationTools::OPTIMIZATION_BFGS); delete parameters; } scalar_type LL=- tl1->getValue(); //Here's your log likelihood value ! delete sites; delete alphabet; delete model; delete rDist; delete tl1; return LL; }
int main(int args, char ** argv) { cout << "******************************************************************" << endl; cout << "* Bio++ Distance Methods, version 2.2.0 *" << endl; cout << "* Author: J. Dutheil Created 05/05/07 *" << endl; cout << "* Last Modif. 04/02/15 *" << endl; cout << "******************************************************************" << endl; cout << endl; if(args == 1) { help(); return 0; } try { BppApplication bppdist(args, argv, "BppDist"); bppdist.startTimer(); Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppdist.getParams(), "", false); auto_ptr<GeneticCode> gCode; CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet); if (codonAlphabet) { string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppdist.getParams(), "Standard", "", true, true); ApplicationTools::displayResult("Genetic Code", codeDesc); gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc)); } VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppdist.getParams()); VectorSiteContainer* sites = SequenceApplicationTools::getSitesToAnalyse(* allSites, bppdist.getParams()); delete allSites; ApplicationTools::displayResult("Number of sequences", TextTools::toString(sites->getNumberOfSequences())); ApplicationTools::displayResult("Number of sites", TextTools::toString(sites->getNumberOfSites())); SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppdist.getParams()); DiscreteDistribution* rDist = 0; if (model->getNumberOfStates() > model->getAlphabet()->getSize()) { //Markov-modulated Markov model! rDist = new ConstantRateDistribution(); } else { rDist = PhylogeneticsApplicationTools::getRateDistribution(bppdist.getParams()); } DistanceEstimation distEstimation(model, rDist, sites, 1, false); string method = ApplicationTools::getStringParameter("method", bppdist.getParams(), "nj"); ApplicationTools::displayResult("Tree reconstruction method", method); TreeTemplate<Node>* tree; AgglomerativeDistanceMethod* distMethod = 0; if(method == "wpgma") { PGMA* wpgma = new PGMA(true); distMethod = wpgma; } else if(method == "upgma") { PGMA* upgma = new PGMA(false); distMethod = upgma; } else if(method == "nj") { NeighborJoining* nj = new NeighborJoining(); nj->outputPositiveLengths(true); distMethod = nj; } else if(method == "bionj") { BioNJ* bionj = new BioNJ(); bionj->outputPositiveLengths(true); distMethod = bionj; } else throw Exception("Unknown tree reconstruction method."); string type = ApplicationTools::getStringParameter("optimization.method", bppdist.getParams(), "init"); ApplicationTools::displayResult("Model parameters estimation method", type); if (type == "init") type = OptimizationTools::DISTANCEMETHOD_INIT; else if (type == "pairwise") type = OptimizationTools::DISTANCEMETHOD_PAIRWISE; else if (type == "iterations") type = OptimizationTools::DISTANCEMETHOD_ITERATIONS; else throw Exception("Unknown parameter estimation procedure '" + type + "'."); unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", bppdist.getParams(), 2); string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", bppdist.getParams(), false, false); OutputStream* messenger = (mhPath == "none") ? 0 : (mhPath == "std") ? ApplicationTools::message : new StlOutputStream(new ofstream(mhPath.c_str(), ios::out)); ApplicationTools::displayResult("Message handler", mhPath); string prPath = ApplicationTools::getAFilePath("optimization.profiler", bppdist.getParams(), false, false); OutputStream* profiler = (prPath == "none") ? 0 : (prPath == "std") ? ApplicationTools::message : new StlOutputStream(new ofstream(prPath.c_str(), ios::out)); if(profiler) profiler->setPrecision(20); ApplicationTools::displayResult("Profiler", prPath); // Should I ignore some parameters? ParameterList allParameters = model->getParameters(); allParameters.addParameters(rDist->getParameters()); ParameterList parametersToIgnore; string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameter", bppdist.getParams(), "", "", true, false); bool ignoreBrLen = false; StringTokenizer st(paramListDesc, ","); while (st.hasMoreToken()) { try { string param = st.nextToken(); if (param == "BrLen") ignoreBrLen = true; else { if (allParameters.hasParameter(param)) { Parameter* p = &allParameters.getParameter(param); parametersToIgnore.addParameter(*p); } else ApplicationTools::displayWarning("Parameter '" + param + "' not found."); } } catch (ParameterNotFoundException& pnfe) { ApplicationTools::displayError("Parameter '" + pnfe.getParameter() + "' not found, and so can't be ignored!"); } } unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", bppdist.getParams(), 1000000); ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax)); double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", bppdist.getParams(), .000001); ApplicationTools::displayResult("Tolerance", TextTools::toString(tolerance)); //Here it is: ofstream warn("warnings", ios::out); ApplicationTools::warning = new StlOutputStreamWrapper(&warn); tree = OptimizationTools::buildDistanceTree(distEstimation, *distMethod, parametersToIgnore, !ignoreBrLen, type, tolerance, nbEvalMax, profiler, messenger, optVerbose); warn.close(); delete ApplicationTools::warning; ApplicationTools::warning = ApplicationTools::message; string matrixPath = ApplicationTools::getAFilePath("output.matrix.file", bppdist.getParams(), false, false, "", false); if (matrixPath != "none") { ApplicationTools::displayResult("Output matrix file", matrixPath); string matrixFormat = ApplicationTools::getAFilePath("output.matrix.format", bppdist.getParams(), false, false, "", false); string format = ""; bool extended = false; std::map<std::string, std::string> unparsedArguments_; KeyvalTools::parseProcedure(matrixFormat, format, unparsedArguments_); if (unparsedArguments_.find("type") != unparsedArguments_.end()) { if (unparsedArguments_["type"] == "extended") { extended = true; } else if (unparsedArguments_["type"] == "classic") extended = false; else ApplicationTools::displayWarning("Argument '" + unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " + "Default used instead: not extended."); } else ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: not extended."); ODistanceMatrix* odm = IODistanceMatrixFactory().createWriter(IODistanceMatrixFactory::PHYLIP_FORMAT, extended); odm->write(*distEstimation.getMatrix(), matrixPath, true); delete odm; } PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams()); //Output some parameters: if (type == OptimizationTools::DISTANCEMETHOD_ITERATIONS) { // Write parameters to screen: ParameterList parameters = model->getParameters(); for (unsigned int i = 0; i < parameters.size(); i++) { ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue())); } parameters = rDist->getParameters(); for (unsigned int i = 0; i < parameters.size(); i++) { ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue())); } // Write parameters to file: string parametersFile = ApplicationTools::getAFilePath("output.estimates", bppdist.getParams(), false, false); if (parametersFile != "none") { ofstream out(parametersFile.c_str(), ios::out); parameters = model->getParameters(); for (unsigned int i = 0; i < parameters.size(); i++) { out << parameters[i].getName() << " = " << parameters[i].getValue() << endl; } parameters = rDist->getParameters(); for (unsigned int i = 0; i < parameters.size(); i++) { out << parameters[i].getName() << " = " << parameters[i].getValue() << endl; } out.close(); } } //Bootstrap: unsigned int nbBS = ApplicationTools::getParameter<unsigned int>("bootstrap.number", bppdist.getParams(), 0); if(nbBS > 0) { ApplicationTools::displayResult("Number of bootstrap samples", TextTools::toString(nbBS)); bool approx = ApplicationTools::getBooleanParameter("bootstrap.approximate", bppdist.getParams(), true); ApplicationTools::displayResult("Use approximate bootstrap", TextTools::toString(approx ? "yes" : "no")); if(approx) { type = OptimizationTools::DISTANCEMETHOD_INIT; parametersToIgnore = allParameters; ignoreBrLen = true; } bool bootstrapVerbose = ApplicationTools::getBooleanParameter("bootstrap.verbose", bppdist.getParams(), false, "", true, false); string bsTreesPath = ApplicationTools::getAFilePath("bootstrap.output.file", bppdist.getParams(), false, false); ofstream *out = NULL; if(bsTreesPath != "none") { ApplicationTools::displayResult("Bootstrap trees stored in file", bsTreesPath); out = new ofstream(bsTreesPath.c_str(), ios::out); } Newick newick; vector<Tree *> bsTrees(nbBS); ApplicationTools::displayTask("Bootstrapping", true); for(unsigned int i = 0; i < nbBS; i++) { ApplicationTools::displayGauge(i, nbBS-1, '='); VectorSiteContainer * sample = SiteContainerTools::bootstrapSites(*sites); if(approx) model->setFreqFromData(*sample); distEstimation.setData(sample); bsTrees[i] = OptimizationTools::buildDistanceTree( distEstimation, *distMethod, parametersToIgnore, ignoreBrLen, type, tolerance, nbEvalMax, NULL, NULL, (bootstrapVerbose ? 1 : 0) ); if(out && i == 0) newick.write(*bsTrees[i], bsTreesPath, true); if(out && i > 0) newick.write(*bsTrees[i], bsTreesPath, false); delete sample; } if(out) out->close(); if(out) delete out; ApplicationTools::displayTaskDone(); ApplicationTools::displayTask("Compute bootstrap values"); TreeTools::computeBootstrapValues(*tree, bsTrees); ApplicationTools::displayTaskDone(); for(unsigned int i = 0; i < nbBS; i++) delete bsTrees[i]; //Write resulting tree: PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams()); } delete alphabet; delete sites; delete distMethod; delete tree; bppdist.done();} catch(exception & e) { cout << e.what() << endl; return 1; } return 0; }