Пример #1
0
size_t Alignment::get_number_of_free_parameters() {
    if (!likelihood) throw Exception("Likelihood model not initialised");
    ParameterList pl = likelihood->getBranchLengthsParameters();
    pl.addParameters(model->getIndependentParameters());
    if (rates->getName() == "Gamma") pl.addParameters(rates->getIndependentParameters());
    return pl.size();
}
Пример #2
0
void Alignment::optimise_topology(bool fix_model_params) {
    if (!likelihood) {
        cerr << "Likelihood calculator not set - call initialise_likelihood" << endl;
        throw Exception("Uninitialised likelihood error");
    }
    ParameterList pl = likelihood->getBranchLengthsParameters();
    if (!fix_model_params) {
        pl.addParameters(model->getIndependentParameters());
        if (rates->getName() == "Gamma") pl.addParameters(rates->getIndependentParameters());
    }
    likelihood = make_shared<NNIHomogeneousTreeLikelihood>(*OptimizationTools::optimizeTreeNNI2(likelihood.get(), pl, true, 0.001, 0.1, 1000000, 1, NULL, NULL, false, 10));
}
Пример #3
0
DistanceMatrix* OptimizationTools::estimateDistanceMatrix(
  DistanceEstimation& estimationMethod,
  const ParameterList& parametersToIgnore,
  const std::string& param,
  unsigned int verbose) throw (Exception)
{
  if (param != DISTANCEMETHOD_PAIRWISE && param != DISTANCEMETHOD_INIT)
    throw Exception("OptimizationTools::estimateDistanceMatrix. Invalid option param=" + param + ".");
  estimationMethod.resetAdditionalParameters();
  estimationMethod.setVerbose(verbose);
  if (param == DISTANCEMETHOD_PAIRWISE)
  {
    ParameterList tmp = estimationMethod.getSubstitutionModel().getIndependentParameters();
    tmp.addParameters(estimationMethod.getRateDistribution().getIndependentParameters());
    tmp.deleteParameters(parametersToIgnore.getParameterNames());
    estimationMethod.setAdditionalParameters(tmp);
  }
  // Compute matrice:
  if (verbose > 0)
    ApplicationTools::displayTask("Estimating distance matrix", true);
  estimationMethod.computeMatrix();
  auto_ptr<DistanceMatrix> matrix(estimationMethod.getMatrix());
  if (verbose > 0)
    ApplicationTools::displayTaskDone();

  return matrix.release();
}
ParameterList AbstractDiscreteRatesAcrossSitesTreeLikelihood::getNonDerivableParameters() const
{
  if(!_initialized) throw Exception("AbstractDiscreteRatesAcrossSitesTreeLikelihood::getNonDerivableParameters(). Object is not initialized.");
  ParameterList tmp = getSubstitutionModelParameters();
  tmp.addParameters(getRateDistributionParameters());
  return tmp;
}
Пример #5
0
void Alignment::_print_params() {
    if (likelihood) {
        ParameterList pl = likelihood->getParameters();
        pl.printParameters(cout);
    }
    else if (rates && model) {
         ParameterList pl = rates->getIndependentParameters();
         pl.addParameters(model->getIndependentParameters());
         pl.printParameters(cout);
         cout << "----------" << endl;
    }
}
Пример #6
0
void Alignment::optimise_parameters(bool fix_branch_lengths) {
    if (!likelihood) {
        cerr << "Likelihood calculator not set - call initialise_likelihood" << endl;
        throw Exception("Uninitialised likelihood error");
    }
    ParameterList pl;
    if (fix_branch_lengths) {
        pl = likelihood->getSubstitutionModelParameters();
        pl.addParameters(likelihood->getRateDistributionParameters());
    }
    else {
        pl = likelihood->getParameters();
    }
    OptimizationTools::optimizeNumericalParameters2(likelihood.get(), pl, 0, 0.001, 1000000, NULL, NULL, false, false, 10);
}
Пример #7
0
TreeTemplate<Node>* OptimizationTools::buildDistanceTree(
  DistanceEstimation& estimationMethod,
  AgglomerativeDistanceMethod& reconstructionMethod,
  const ParameterList& parametersToIgnore,
  bool optimizeBrLen,
  const std::string& param,
  double tolerance,
  unsigned int tlEvalMax,
  OutputStream* profiler,
  OutputStream* messenger,
  unsigned int verbose) throw (Exception)
{
  estimationMethod.resetAdditionalParameters();
  estimationMethod.setVerbose(verbose);
  if (param == DISTANCEMETHOD_PAIRWISE)
  {
    ParameterList tmp = estimationMethod.getSubstitutionModel().getIndependentParameters();
    tmp.addParameters(estimationMethod.getRateDistribution().getIndependentParameters());
    tmp.deleteParameters(parametersToIgnore.getParameterNames());
    estimationMethod.setAdditionalParameters(tmp);
  }
  TreeTemplate<Node>* tree = NULL;
  TreeTemplate<Node>* previousTree = NULL;
  bool test = true;
  while (test)
  {
    // Compute matrice:
    if (verbose > 0)
      ApplicationTools::displayTask("Estimating distance matrix", true);
    estimationMethod.computeMatrix();
    DistanceMatrix* matrix = estimationMethod.getMatrix();
    if (verbose > 0)
      ApplicationTools::displayTaskDone();

    // Compute tree:
    if (matrix->size() == 2) {
      //Special case, there is only one possible tree:
      Node* n1 = new Node(0);
      Node* n2 = new Node(1, matrix->getName(0));
      n2->setDistanceToFather((*matrix)(0,0) / 2.);
      Node* n3 = new Node(2, matrix->getName(1));
      n3->setDistanceToFather((*matrix)(0,0) / 2.);
      n1->addSon(n2);
      n1->addSon(n3);
      tree = new TreeTemplate<Node>(n1);
      break;
    }
    if (verbose > 0)
      ApplicationTools::displayTask("Building tree");
    reconstructionMethod.setDistanceMatrix(*matrix);
    reconstructionMethod.computeTree();
    previousTree = tree;
    delete matrix;
    tree = dynamic_cast<TreeTemplate<Node>*>(reconstructionMethod.getTree());
    if (verbose > 0)
      ApplicationTools::displayTaskDone();
    if (previousTree && verbose > 0)
    {
      int rf = TreeTools::robinsonFouldsDistance(*previousTree, *tree, false);
      ApplicationTools::displayResult("Topo. distance with previous iteration", TextTools::toString(rf));
      test = (rf == 0);
      delete previousTree;
    }
    if (param != DISTANCEMETHOD_ITERATIONS)
      break;  // Ends here.

    // Now, re-estimate parameters:
    auto_ptr<SubstitutionModel> model(estimationMethod.getSubstitutionModel().clone());
    auto_ptr<DiscreteDistribution> rdist(estimationMethod.getRateDistribution().clone());
    DRHomogeneousTreeLikelihood tl(*tree,
        *estimationMethod.getData(),
        model.get(),
        rdist.get(),
        true, verbose > 1);
    tl.initialize();
    ParameterList parameters = tl.getParameters();
    if (!optimizeBrLen)
    {
      vector<string> vs = tl.getBranchLengthsParameters().getParameterNames();
      parameters.deleteParameters(vs);
    }
    parameters.deleteParameters(parametersToIgnore.getParameterNames());
    optimizeNumericalParameters(&tl, parameters, NULL, 0, tolerance, tlEvalMax, messenger, profiler, verbose > 0 ? verbose - 1 : 0);
    if (verbose > 0)
    {
      ParameterList tmp = tl.getSubstitutionModelParameters();
      for (unsigned int i = 0; i < tmp.size(); i++)
      {
        ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
      }
      tmp = tl.getRateDistributionParameters();
      for (unsigned int i = 0; i < tmp.size(); i++)
      {
        ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
      }
    }
  }
  return tree;
}
Пример #8
0
unsigned int OptimizationTools::optimizeNumericalParameters2(
  DiscreteRatesAcrossSitesTreeLikelihood* tl,
  const ParameterList& parameters,
  OptimizationListener* listener,
  double tolerance,
  unsigned int tlEvalMax,
  OutputStream* messageHandler,
  OutputStream* profiler,
  bool reparametrization,
  bool useClock,
  unsigned int verbose,
  const std::string& optMethodDeriv)
throw (Exception)
{
  DerivableSecondOrder* f = tl;
  ParameterList pl = parameters;
  // Shall we use a molecular clock constraint on branch lengths?
  auto_ptr<GlobalClockTreeLikelihoodFunctionWrapper> fclock;
  if (useClock)
  {
    fclock.reset(new GlobalClockTreeLikelihoodFunctionWrapper(tl));
    f = fclock.get();
    if (verbose > 0)
      ApplicationTools::displayResult("Log-likelihood after adding clock", -tl->getLogLikelihood()); 
    
    // Reset parameters to use new branch lengths. WARNING! 'old' branch parameters do not exist anymore and have been replaced by heights
    pl = fclock->getParameters().getCommonParametersWith(parameters);
    pl.addParameters(fclock->getHeightParameters());
  }
  // Shall we reparametrize the function to remove constraints?
  auto_ptr<DerivableSecondOrder> frep;
  if (reparametrization)
  {
    frep.reset(new ReparametrizationDerivableSecondOrderWrapper(f, pl));
    f = frep.get();

    // Reset parameters to remove constraints:
    pl = f->getParameters().subList(pl.getParameterNames());
  }

  auto_ptr<AbstractNumericalDerivative> fnum;
  // Build optimizer:
  auto_ptr<Optimizer> optimizer;
  if (optMethodDeriv == OPTIMIZATION_GRADIENT)
  {
    fnum.reset(new TwoPointsNumericalDerivative(f));
    fnum->setInterval(0.0000001);
    optimizer.reset(new ConjugateGradientMultiDimensions(fnum.get()));
  }
  else if (optMethodDeriv == OPTIMIZATION_NEWTON)
  {
    fnum.reset(new ThreePointsNumericalDerivative(f));
    fnum->setInterval(0.0001);
    optimizer.reset(new PseudoNewtonOptimizer(fnum.get()));
  }
  else if (optMethodDeriv == OPTIMIZATION_BFGS)
  {
    fnum.reset(new TwoPointsNumericalDerivative(f));
    fnum->setInterval(0.0001);
    optimizer.reset(new BfgsMultiDimensions(fnum.get()));
  }
  else
    throw Exception("OptimizationTools::optimizeNumericalParameters2. Unknown optimization method: " + optMethodDeriv);

  // Numerical derivatives:
  ParameterList tmp = tl->getNonDerivableParameters(); 
  if (useClock)
    tmp.addParameters(fclock->getHeightParameters());
  fnum->setParametersToDerivate(tmp.getParameterNames());
  optimizer->setVerbose(verbose);
  optimizer->setProfiler(profiler);
  optimizer->setMessageHandler(messageHandler);
  optimizer->setMaximumNumberOfEvaluations(tlEvalMax);
  optimizer->getStopCondition()->setTolerance(tolerance);

  // Optimize TreeLikelihood function:
  optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
  NaNListener* nanListener = new NaNListener(optimizer.get(), tl);
  optimizer->addOptimizationListener(nanListener);
  if (listener)
    optimizer->addOptimizationListener(listener);
  optimizer->init(pl);
  optimizer->optimize();

  if (verbose > 0)
    ApplicationTools::displayMessage("\n");

  // We're done.
  return optimizer->getNumberOfEvaluations();
}
Пример #9
0
int main(int args, char ** argv)
{
  cout << "******************************************************************" << endl;
  cout << "*              Bio++ Distance Methods, version 2.2.0             *" << endl;
  cout << "* Author: J. Dutheil                        Created     05/05/07 *" << endl;
  cout << "*                                           Last Modif. 04/02/15 *" << endl;
  cout << "******************************************************************" << endl;
  cout << endl;

  if(args == 1)
  {
    help();
    return 0;
  }
  
  try {

  BppApplication bppdist(args, argv, "BppDist");
  bppdist.startTimer();

  Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppdist.getParams(), "", false);
  auto_ptr<GeneticCode> gCode;
  CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet);
  if (codonAlphabet) {
    string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppdist.getParams(), "Standard", "", true, true);
    ApplicationTools::displayResult("Genetic Code", codeDesc);

    gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
  }

  VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppdist.getParams());
  
  VectorSiteContainer* sites = SequenceApplicationTools::getSitesToAnalyse(* allSites, bppdist.getParams());
  delete allSites;

  ApplicationTools::displayResult("Number of sequences", TextTools::toString(sites->getNumberOfSequences()));
  ApplicationTools::displayResult("Number of sites", TextTools::toString(sites->getNumberOfSites()));
  
  SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppdist.getParams());
  
	DiscreteDistribution* rDist = 0;
  if (model->getNumberOfStates() > model->getAlphabet()->getSize())
  {
    //Markov-modulated Markov model!
    rDist = new ConstantRateDistribution();
  }
  else
  {
	  rDist = PhylogeneticsApplicationTools::getRateDistribution(bppdist.getParams());
  }
   
  DistanceEstimation distEstimation(model, rDist, sites, 1, false);
 
  string method = ApplicationTools::getStringParameter("method", bppdist.getParams(), "nj");
  ApplicationTools::displayResult("Tree reconstruction method", method);
  TreeTemplate<Node>* tree;
  AgglomerativeDistanceMethod* distMethod = 0;
  if(method == "wpgma")
  {
    PGMA* wpgma = new PGMA(true);
    distMethod = wpgma;
  }
  else if(method == "upgma")
  {
    PGMA* upgma = new PGMA(false);
    distMethod = upgma;
  }
  else if(method == "nj")
  {
    NeighborJoining* nj = new NeighborJoining();
    nj->outputPositiveLengths(true);
    distMethod = nj;
  }
  else if(method == "bionj")
  {
    BioNJ* bionj = new BioNJ();
    bionj->outputPositiveLengths(true);
    distMethod = bionj;
  }
  else throw Exception("Unknown tree reconstruction method.");
  
  string type = ApplicationTools::getStringParameter("optimization.method", bppdist.getParams(), "init");
  ApplicationTools::displayResult("Model parameters estimation method", type);
  if (type == "init") type = OptimizationTools::DISTANCEMETHOD_INIT;
  else if (type == "pairwise") type = OptimizationTools::DISTANCEMETHOD_PAIRWISE;
  else if (type == "iterations") type = OptimizationTools::DISTANCEMETHOD_ITERATIONS;
  else throw Exception("Unknown parameter estimation procedure '" + type + "'.");
  
	unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", bppdist.getParams(), 2);
	
	string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", bppdist.getParams(), false, false);
	OutputStream* messenger = 
		(mhPath == "none") ? 0 :
			(mhPath == "std") ? ApplicationTools::message :
				new StlOutputStream(new ofstream(mhPath.c_str(), ios::out));
	ApplicationTools::displayResult("Message handler", mhPath);

	string prPath = ApplicationTools::getAFilePath("optimization.profiler", bppdist.getParams(), false, false);
	OutputStream* profiler = 
		(prPath == "none") ? 0 :
			(prPath == "std") ? ApplicationTools::message :
				new StlOutputStream(new ofstream(prPath.c_str(), ios::out));
	if(profiler) profiler->setPrecision(20);
	ApplicationTools::displayResult("Profiler", prPath);

	// Should I ignore some parameters?
  ParameterList allParameters = model->getParameters();
  allParameters.addParameters(rDist->getParameters());
	ParameterList parametersToIgnore;
  string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameter", bppdist.getParams(), "", "", true, false);
	bool ignoreBrLen = false;
  StringTokenizer st(paramListDesc, ",");
	while (st.hasMoreToken())
  {
		try
    {
      string param = st.nextToken();
      if (param == "BrLen")
        ignoreBrLen = true;
      else
      {
        if (allParameters.hasParameter(param))
        {
          Parameter* p = &allParameters.getParameter(param);
          parametersToIgnore.addParameter(*p);
        }
        else ApplicationTools::displayWarning("Parameter '" + param + "' not found."); 
      }
		} 
    catch (ParameterNotFoundException& pnfe)
    {
			ApplicationTools::displayError("Parameter '" + pnfe.getParameter() + "' not found, and so can't be ignored!");
		}
	}
	
	unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", bppdist.getParams(), 1000000);
	ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax));
	
	double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", bppdist.getParams(), .000001);
	ApplicationTools::displayResult("Tolerance", TextTools::toString(tolerance));
	
  //Here it is:
  ofstream warn("warnings", ios::out);
  ApplicationTools::warning = new StlOutputStreamWrapper(&warn);
  tree = OptimizationTools::buildDistanceTree(distEstimation, *distMethod, parametersToIgnore, !ignoreBrLen, type, tolerance, nbEvalMax, profiler, messenger, optVerbose);
  warn.close();
  delete ApplicationTools::warning;
  ApplicationTools::warning = ApplicationTools::message;

  string matrixPath = ApplicationTools::getAFilePath("output.matrix.file", bppdist.getParams(), false, false, "", false);
  if (matrixPath != "none")
  {
    ApplicationTools::displayResult("Output matrix file", matrixPath);
    string matrixFormat = ApplicationTools::getAFilePath("output.matrix.format", bppdist.getParams(), false, false, "", false);
    string format = "";
    bool extended = false;
    std::map<std::string, std::string> unparsedArguments_;
    KeyvalTools::parseProcedure(matrixFormat, format, unparsedArguments_);
    if (unparsedArguments_.find("type") != unparsedArguments_.end())
    {
      if (unparsedArguments_["type"] == "extended")
      {
        extended = true;
      }     
      else if (unparsedArguments_["type"] == "classic")
        extended = false;
      else
        ApplicationTools::displayWarning("Argument '" +
                                         unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " +
                                         "Default used instead: not extended.");
    }    
    else
      ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: not extended.");
    

    ODistanceMatrix* odm = IODistanceMatrixFactory().createWriter(IODistanceMatrixFactory::PHYLIP_FORMAT, extended);
    odm->write(*distEstimation.getMatrix(), matrixPath, true);
    delete odm;
  }
  PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams());
  
  //Output some parameters:
  if (type == OptimizationTools::DISTANCEMETHOD_ITERATIONS)
  {
    // Write parameters to screen:
    ParameterList parameters = model->getParameters();
    for (unsigned int i = 0; i < parameters.size(); i++)
    {
		  ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
    }
    parameters = rDist->getParameters();
    for (unsigned int i = 0; i < parameters.size(); i++)
    {
		  ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
    }
    // Write parameters to file:
	  string parametersFile = ApplicationTools::getAFilePath("output.estimates", bppdist.getParams(), false, false);
    if (parametersFile != "none")
    {
		  ofstream out(parametersFile.c_str(), ios::out);
      parameters = model->getParameters();
      for (unsigned int i = 0; i < parameters.size(); i++)
      {
        out << parameters[i].getName() << " = " << parameters[i].getValue() << endl;
      }
      parameters = rDist->getParameters();
      for (unsigned int i = 0; i < parameters.size(); i++)
      {
        out << parameters[i].getName() << " = " << parameters[i].getValue() << endl;
      }
      out.close();
    }
  }
 
  //Bootstrap:
  unsigned int nbBS = ApplicationTools::getParameter<unsigned int>("bootstrap.number", bppdist.getParams(), 0);
  if(nbBS > 0)
  {
    ApplicationTools::displayResult("Number of bootstrap samples", TextTools::toString(nbBS));
    bool approx = ApplicationTools::getBooleanParameter("bootstrap.approximate", bppdist.getParams(), true);
    ApplicationTools::displayResult("Use approximate bootstrap", TextTools::toString(approx ? "yes" : "no"));
    if(approx)
    {
      type = OptimizationTools::DISTANCEMETHOD_INIT;
      parametersToIgnore = allParameters;
      ignoreBrLen = true;
    }
    bool bootstrapVerbose = ApplicationTools::getBooleanParameter("bootstrap.verbose", bppdist.getParams(), false, "", true, false);
 
    string bsTreesPath = ApplicationTools::getAFilePath("bootstrap.output.file", bppdist.getParams(), false, false);
    ofstream *out = NULL;
    if(bsTreesPath != "none")
    {
      ApplicationTools::displayResult("Bootstrap trees stored in file", bsTreesPath);
      out = new ofstream(bsTreesPath.c_str(), ios::out);
    }
    Newick newick;
    
    vector<Tree *> bsTrees(nbBS);
    ApplicationTools::displayTask("Bootstrapping", true);
    for(unsigned int i = 0; i < nbBS; i++)
    {
      ApplicationTools::displayGauge(i, nbBS-1, '=');
      VectorSiteContainer * sample = SiteContainerTools::bootstrapSites(*sites);
      if(approx) model->setFreqFromData(*sample);
      distEstimation.setData(sample);
      bsTrees[i] = OptimizationTools::buildDistanceTree(
          distEstimation,
          *distMethod,
          parametersToIgnore,
          ignoreBrLen,
          type,
          tolerance,
          nbEvalMax,
          NULL,
          NULL,
          (bootstrapVerbose ? 1 : 0)
        );
      if(out && i == 0) newick.write(*bsTrees[i], bsTreesPath, true);
      if(out && i >  0) newick.write(*bsTrees[i], bsTreesPath, false);
      delete sample;
    }
    if(out) out->close();
    if(out) delete out;
    ApplicationTools::displayTaskDone();
    ApplicationTools::displayTask("Compute bootstrap values");
    TreeTools::computeBootstrapValues(*tree, bsTrees);
    ApplicationTools::displayTaskDone();
    for(unsigned int i = 0; i < nbBS; i++) delete bsTrees[i];

    //Write resulting tree:
    PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams());
  }
    
  delete alphabet;
  delete sites;
  delete distMethod;
  delete tree;

  bppdist.done();}
  
      
  catch(exception & e)
  {
    cout << e.what() << endl;
    return 1;
  }

  return 0;
}