size_t Alignment::get_number_of_free_parameters() { if (!likelihood) throw Exception("Likelihood model not initialised"); ParameterList pl = likelihood->getBranchLengthsParameters(); pl.addParameters(model->getIndependentParameters()); if (rates->getName() == "Gamma") pl.addParameters(rates->getIndependentParameters()); return pl.size(); }
void Alignment::optimise_topology(bool fix_model_params) { if (!likelihood) { cerr << "Likelihood calculator not set - call initialise_likelihood" << endl; throw Exception("Uninitialised likelihood error"); } ParameterList pl = likelihood->getBranchLengthsParameters(); if (!fix_model_params) { pl.addParameters(model->getIndependentParameters()); if (rates->getName() == "Gamma") pl.addParameters(rates->getIndependentParameters()); } likelihood = make_shared<NNIHomogeneousTreeLikelihood>(*OptimizationTools::optimizeTreeNNI2(likelihood.get(), pl, true, 0.001, 0.1, 1000000, 1, NULL, NULL, false, 10)); }
DistanceMatrix* OptimizationTools::estimateDistanceMatrix( DistanceEstimation& estimationMethod, const ParameterList& parametersToIgnore, const std::string& param, unsigned int verbose) throw (Exception) { if (param != DISTANCEMETHOD_PAIRWISE && param != DISTANCEMETHOD_INIT) throw Exception("OptimizationTools::estimateDistanceMatrix. Invalid option param=" + param + "."); estimationMethod.resetAdditionalParameters(); estimationMethod.setVerbose(verbose); if (param == DISTANCEMETHOD_PAIRWISE) { ParameterList tmp = estimationMethod.getSubstitutionModel().getIndependentParameters(); tmp.addParameters(estimationMethod.getRateDistribution().getIndependentParameters()); tmp.deleteParameters(parametersToIgnore.getParameterNames()); estimationMethod.setAdditionalParameters(tmp); } // Compute matrice: if (verbose > 0) ApplicationTools::displayTask("Estimating distance matrix", true); estimationMethod.computeMatrix(); auto_ptr<DistanceMatrix> matrix(estimationMethod.getMatrix()); if (verbose > 0) ApplicationTools::displayTaskDone(); return matrix.release(); }
ParameterList AbstractDiscreteRatesAcrossSitesTreeLikelihood::getNonDerivableParameters() const { if(!_initialized) throw Exception("AbstractDiscreteRatesAcrossSitesTreeLikelihood::getNonDerivableParameters(). Object is not initialized."); ParameterList tmp = getSubstitutionModelParameters(); tmp.addParameters(getRateDistributionParameters()); return tmp; }
void Alignment::_print_params() { if (likelihood) { ParameterList pl = likelihood->getParameters(); pl.printParameters(cout); } else if (rates && model) { ParameterList pl = rates->getIndependentParameters(); pl.addParameters(model->getIndependentParameters()); pl.printParameters(cout); cout << "----------" << endl; } }
void Alignment::optimise_parameters(bool fix_branch_lengths) { if (!likelihood) { cerr << "Likelihood calculator not set - call initialise_likelihood" << endl; throw Exception("Uninitialised likelihood error"); } ParameterList pl; if (fix_branch_lengths) { pl = likelihood->getSubstitutionModelParameters(); pl.addParameters(likelihood->getRateDistributionParameters()); } else { pl = likelihood->getParameters(); } OptimizationTools::optimizeNumericalParameters2(likelihood.get(), pl, 0, 0.001, 1000000, NULL, NULL, false, false, 10); }
TreeTemplate<Node>* OptimizationTools::buildDistanceTree( DistanceEstimation& estimationMethod, AgglomerativeDistanceMethod& reconstructionMethod, const ParameterList& parametersToIgnore, bool optimizeBrLen, const std::string& param, double tolerance, unsigned int tlEvalMax, OutputStream* profiler, OutputStream* messenger, unsigned int verbose) throw (Exception) { estimationMethod.resetAdditionalParameters(); estimationMethod.setVerbose(verbose); if (param == DISTANCEMETHOD_PAIRWISE) { ParameterList tmp = estimationMethod.getSubstitutionModel().getIndependentParameters(); tmp.addParameters(estimationMethod.getRateDistribution().getIndependentParameters()); tmp.deleteParameters(parametersToIgnore.getParameterNames()); estimationMethod.setAdditionalParameters(tmp); } TreeTemplate<Node>* tree = NULL; TreeTemplate<Node>* previousTree = NULL; bool test = true; while (test) { // Compute matrice: if (verbose > 0) ApplicationTools::displayTask("Estimating distance matrix", true); estimationMethod.computeMatrix(); DistanceMatrix* matrix = estimationMethod.getMatrix(); if (verbose > 0) ApplicationTools::displayTaskDone(); // Compute tree: if (matrix->size() == 2) { //Special case, there is only one possible tree: Node* n1 = new Node(0); Node* n2 = new Node(1, matrix->getName(0)); n2->setDistanceToFather((*matrix)(0,0) / 2.); Node* n3 = new Node(2, matrix->getName(1)); n3->setDistanceToFather((*matrix)(0,0) / 2.); n1->addSon(n2); n1->addSon(n3); tree = new TreeTemplate<Node>(n1); break; } if (verbose > 0) ApplicationTools::displayTask("Building tree"); reconstructionMethod.setDistanceMatrix(*matrix); reconstructionMethod.computeTree(); previousTree = tree; delete matrix; tree = dynamic_cast<TreeTemplate<Node>*>(reconstructionMethod.getTree()); if (verbose > 0) ApplicationTools::displayTaskDone(); if (previousTree && verbose > 0) { int rf = TreeTools::robinsonFouldsDistance(*previousTree, *tree, false); ApplicationTools::displayResult("Topo. distance with previous iteration", TextTools::toString(rf)); test = (rf == 0); delete previousTree; } if (param != DISTANCEMETHOD_ITERATIONS) break; // Ends here. // Now, re-estimate parameters: auto_ptr<SubstitutionModel> model(estimationMethod.getSubstitutionModel().clone()); auto_ptr<DiscreteDistribution> rdist(estimationMethod.getRateDistribution().clone()); DRHomogeneousTreeLikelihood tl(*tree, *estimationMethod.getData(), model.get(), rdist.get(), true, verbose > 1); tl.initialize(); ParameterList parameters = tl.getParameters(); if (!optimizeBrLen) { vector<string> vs = tl.getBranchLengthsParameters().getParameterNames(); parameters.deleteParameters(vs); } parameters.deleteParameters(parametersToIgnore.getParameterNames()); optimizeNumericalParameters(&tl, parameters, NULL, 0, tolerance, tlEvalMax, messenger, profiler, verbose > 0 ? verbose - 1 : 0); if (verbose > 0) { ParameterList tmp = tl.getSubstitutionModelParameters(); for (unsigned int i = 0; i < tmp.size(); i++) { ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue())); } tmp = tl.getRateDistributionParameters(); for (unsigned int i = 0; i < tmp.size(); i++) { ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue())); } } } return tree; }
unsigned int OptimizationTools::optimizeNumericalParameters2( DiscreteRatesAcrossSitesTreeLikelihood* tl, const ParameterList& parameters, OptimizationListener* listener, double tolerance, unsigned int tlEvalMax, OutputStream* messageHandler, OutputStream* profiler, bool reparametrization, bool useClock, unsigned int verbose, const std::string& optMethodDeriv) throw (Exception) { DerivableSecondOrder* f = tl; ParameterList pl = parameters; // Shall we use a molecular clock constraint on branch lengths? auto_ptr<GlobalClockTreeLikelihoodFunctionWrapper> fclock; if (useClock) { fclock.reset(new GlobalClockTreeLikelihoodFunctionWrapper(tl)); f = fclock.get(); if (verbose > 0) ApplicationTools::displayResult("Log-likelihood after adding clock", -tl->getLogLikelihood()); // Reset parameters to use new branch lengths. WARNING! 'old' branch parameters do not exist anymore and have been replaced by heights pl = fclock->getParameters().getCommonParametersWith(parameters); pl.addParameters(fclock->getHeightParameters()); } // Shall we reparametrize the function to remove constraints? auto_ptr<DerivableSecondOrder> frep; if (reparametrization) { frep.reset(new ReparametrizationDerivableSecondOrderWrapper(f, pl)); f = frep.get(); // Reset parameters to remove constraints: pl = f->getParameters().subList(pl.getParameterNames()); } auto_ptr<AbstractNumericalDerivative> fnum; // Build optimizer: auto_ptr<Optimizer> optimizer; if (optMethodDeriv == OPTIMIZATION_GRADIENT) { fnum.reset(new TwoPointsNumericalDerivative(f)); fnum->setInterval(0.0000001); optimizer.reset(new ConjugateGradientMultiDimensions(fnum.get())); } else if (optMethodDeriv == OPTIMIZATION_NEWTON) { fnum.reset(new ThreePointsNumericalDerivative(f)); fnum->setInterval(0.0001); optimizer.reset(new PseudoNewtonOptimizer(fnum.get())); } else if (optMethodDeriv == OPTIMIZATION_BFGS) { fnum.reset(new TwoPointsNumericalDerivative(f)); fnum->setInterval(0.0001); optimizer.reset(new BfgsMultiDimensions(fnum.get())); } else throw Exception("OptimizationTools::optimizeNumericalParameters2. Unknown optimization method: " + optMethodDeriv); // Numerical derivatives: ParameterList tmp = tl->getNonDerivableParameters(); if (useClock) tmp.addParameters(fclock->getHeightParameters()); fnum->setParametersToDerivate(tmp.getParameterNames()); optimizer->setVerbose(verbose); optimizer->setProfiler(profiler); optimizer->setMessageHandler(messageHandler); optimizer->setMaximumNumberOfEvaluations(tlEvalMax); optimizer->getStopCondition()->setTolerance(tolerance); // Optimize TreeLikelihood function: optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO); NaNListener* nanListener = new NaNListener(optimizer.get(), tl); optimizer->addOptimizationListener(nanListener); if (listener) optimizer->addOptimizationListener(listener); optimizer->init(pl); optimizer->optimize(); if (verbose > 0) ApplicationTools::displayMessage("\n"); // We're done. return optimizer->getNumberOfEvaluations(); }
int main(int args, char ** argv) { cout << "******************************************************************" << endl; cout << "* Bio++ Distance Methods, version 2.2.0 *" << endl; cout << "* Author: J. Dutheil Created 05/05/07 *" << endl; cout << "* Last Modif. 04/02/15 *" << endl; cout << "******************************************************************" << endl; cout << endl; if(args == 1) { help(); return 0; } try { BppApplication bppdist(args, argv, "BppDist"); bppdist.startTimer(); Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppdist.getParams(), "", false); auto_ptr<GeneticCode> gCode; CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet); if (codonAlphabet) { string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppdist.getParams(), "Standard", "", true, true); ApplicationTools::displayResult("Genetic Code", codeDesc); gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc)); } VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppdist.getParams()); VectorSiteContainer* sites = SequenceApplicationTools::getSitesToAnalyse(* allSites, bppdist.getParams()); delete allSites; ApplicationTools::displayResult("Number of sequences", TextTools::toString(sites->getNumberOfSequences())); ApplicationTools::displayResult("Number of sites", TextTools::toString(sites->getNumberOfSites())); SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppdist.getParams()); DiscreteDistribution* rDist = 0; if (model->getNumberOfStates() > model->getAlphabet()->getSize()) { //Markov-modulated Markov model! rDist = new ConstantRateDistribution(); } else { rDist = PhylogeneticsApplicationTools::getRateDistribution(bppdist.getParams()); } DistanceEstimation distEstimation(model, rDist, sites, 1, false); string method = ApplicationTools::getStringParameter("method", bppdist.getParams(), "nj"); ApplicationTools::displayResult("Tree reconstruction method", method); TreeTemplate<Node>* tree; AgglomerativeDistanceMethod* distMethod = 0; if(method == "wpgma") { PGMA* wpgma = new PGMA(true); distMethod = wpgma; } else if(method == "upgma") { PGMA* upgma = new PGMA(false); distMethod = upgma; } else if(method == "nj") { NeighborJoining* nj = new NeighborJoining(); nj->outputPositiveLengths(true); distMethod = nj; } else if(method == "bionj") { BioNJ* bionj = new BioNJ(); bionj->outputPositiveLengths(true); distMethod = bionj; } else throw Exception("Unknown tree reconstruction method."); string type = ApplicationTools::getStringParameter("optimization.method", bppdist.getParams(), "init"); ApplicationTools::displayResult("Model parameters estimation method", type); if (type == "init") type = OptimizationTools::DISTANCEMETHOD_INIT; else if (type == "pairwise") type = OptimizationTools::DISTANCEMETHOD_PAIRWISE; else if (type == "iterations") type = OptimizationTools::DISTANCEMETHOD_ITERATIONS; else throw Exception("Unknown parameter estimation procedure '" + type + "'."); unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", bppdist.getParams(), 2); string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", bppdist.getParams(), false, false); OutputStream* messenger = (mhPath == "none") ? 0 : (mhPath == "std") ? ApplicationTools::message : new StlOutputStream(new ofstream(mhPath.c_str(), ios::out)); ApplicationTools::displayResult("Message handler", mhPath); string prPath = ApplicationTools::getAFilePath("optimization.profiler", bppdist.getParams(), false, false); OutputStream* profiler = (prPath == "none") ? 0 : (prPath == "std") ? ApplicationTools::message : new StlOutputStream(new ofstream(prPath.c_str(), ios::out)); if(profiler) profiler->setPrecision(20); ApplicationTools::displayResult("Profiler", prPath); // Should I ignore some parameters? ParameterList allParameters = model->getParameters(); allParameters.addParameters(rDist->getParameters()); ParameterList parametersToIgnore; string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameter", bppdist.getParams(), "", "", true, false); bool ignoreBrLen = false; StringTokenizer st(paramListDesc, ","); while (st.hasMoreToken()) { try { string param = st.nextToken(); if (param == "BrLen") ignoreBrLen = true; else { if (allParameters.hasParameter(param)) { Parameter* p = &allParameters.getParameter(param); parametersToIgnore.addParameter(*p); } else ApplicationTools::displayWarning("Parameter '" + param + "' not found."); } } catch (ParameterNotFoundException& pnfe) { ApplicationTools::displayError("Parameter '" + pnfe.getParameter() + "' not found, and so can't be ignored!"); } } unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", bppdist.getParams(), 1000000); ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax)); double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", bppdist.getParams(), .000001); ApplicationTools::displayResult("Tolerance", TextTools::toString(tolerance)); //Here it is: ofstream warn("warnings", ios::out); ApplicationTools::warning = new StlOutputStreamWrapper(&warn); tree = OptimizationTools::buildDistanceTree(distEstimation, *distMethod, parametersToIgnore, !ignoreBrLen, type, tolerance, nbEvalMax, profiler, messenger, optVerbose); warn.close(); delete ApplicationTools::warning; ApplicationTools::warning = ApplicationTools::message; string matrixPath = ApplicationTools::getAFilePath("output.matrix.file", bppdist.getParams(), false, false, "", false); if (matrixPath != "none") { ApplicationTools::displayResult("Output matrix file", matrixPath); string matrixFormat = ApplicationTools::getAFilePath("output.matrix.format", bppdist.getParams(), false, false, "", false); string format = ""; bool extended = false; std::map<std::string, std::string> unparsedArguments_; KeyvalTools::parseProcedure(matrixFormat, format, unparsedArguments_); if (unparsedArguments_.find("type") != unparsedArguments_.end()) { if (unparsedArguments_["type"] == "extended") { extended = true; } else if (unparsedArguments_["type"] == "classic") extended = false; else ApplicationTools::displayWarning("Argument '" + unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " + "Default used instead: not extended."); } else ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: not extended."); ODistanceMatrix* odm = IODistanceMatrixFactory().createWriter(IODistanceMatrixFactory::PHYLIP_FORMAT, extended); odm->write(*distEstimation.getMatrix(), matrixPath, true); delete odm; } PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams()); //Output some parameters: if (type == OptimizationTools::DISTANCEMETHOD_ITERATIONS) { // Write parameters to screen: ParameterList parameters = model->getParameters(); for (unsigned int i = 0; i < parameters.size(); i++) { ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue())); } parameters = rDist->getParameters(); for (unsigned int i = 0; i < parameters.size(); i++) { ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue())); } // Write parameters to file: string parametersFile = ApplicationTools::getAFilePath("output.estimates", bppdist.getParams(), false, false); if (parametersFile != "none") { ofstream out(parametersFile.c_str(), ios::out); parameters = model->getParameters(); for (unsigned int i = 0; i < parameters.size(); i++) { out << parameters[i].getName() << " = " << parameters[i].getValue() << endl; } parameters = rDist->getParameters(); for (unsigned int i = 0; i < parameters.size(); i++) { out << parameters[i].getName() << " = " << parameters[i].getValue() << endl; } out.close(); } } //Bootstrap: unsigned int nbBS = ApplicationTools::getParameter<unsigned int>("bootstrap.number", bppdist.getParams(), 0); if(nbBS > 0) { ApplicationTools::displayResult("Number of bootstrap samples", TextTools::toString(nbBS)); bool approx = ApplicationTools::getBooleanParameter("bootstrap.approximate", bppdist.getParams(), true); ApplicationTools::displayResult("Use approximate bootstrap", TextTools::toString(approx ? "yes" : "no")); if(approx) { type = OptimizationTools::DISTANCEMETHOD_INIT; parametersToIgnore = allParameters; ignoreBrLen = true; } bool bootstrapVerbose = ApplicationTools::getBooleanParameter("bootstrap.verbose", bppdist.getParams(), false, "", true, false); string bsTreesPath = ApplicationTools::getAFilePath("bootstrap.output.file", bppdist.getParams(), false, false); ofstream *out = NULL; if(bsTreesPath != "none") { ApplicationTools::displayResult("Bootstrap trees stored in file", bsTreesPath); out = new ofstream(bsTreesPath.c_str(), ios::out); } Newick newick; vector<Tree *> bsTrees(nbBS); ApplicationTools::displayTask("Bootstrapping", true); for(unsigned int i = 0; i < nbBS; i++) { ApplicationTools::displayGauge(i, nbBS-1, '='); VectorSiteContainer * sample = SiteContainerTools::bootstrapSites(*sites); if(approx) model->setFreqFromData(*sample); distEstimation.setData(sample); bsTrees[i] = OptimizationTools::buildDistanceTree( distEstimation, *distMethod, parametersToIgnore, ignoreBrLen, type, tolerance, nbEvalMax, NULL, NULL, (bootstrapVerbose ? 1 : 0) ); if(out && i == 0) newick.write(*bsTrees[i], bsTreesPath, true); if(out && i > 0) newick.write(*bsTrees[i], bsTreesPath, false); delete sample; } if(out) out->close(); if(out) delete out; ApplicationTools::displayTaskDone(); ApplicationTools::displayTask("Compute bootstrap values"); TreeTools::computeBootstrapValues(*tree, bsTrees); ApplicationTools::displayTaskDone(); for(unsigned int i = 0; i < nbBS; i++) delete bsTrees[i]; //Write resulting tree: PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams()); } delete alphabet; delete sites; delete distMethod; delete tree; bppdist.done();} catch(exception & e) { cout << e.what() << endl; return 1; } return 0; }