Esempio n. 1
0
pair<string,scalar_type> tree_LL_nucl(string tree,string aln_filename,bool optimize_bls,scalar_type tolerance)
{
  //const Alphabet* alphabet = new ProteicAlphabet();
  const Alphabet* alphabet = new RNA();
	OrderedSequenceContainer *alignment;
	VectorSiteContainer* sites;
	Fasta Reader;
	//NexusIOSequence Reader;
	//Phylip * Reader=new Phylip(true,true,100,true,"\r");
	alignment = Reader.read(aln_filename, alphabet);
	sites = new VectorSiteContainer(*alignment);
	SiteContainerTools::removeGapOnlySites(*sites);	
	SiteContainerTools::changeGapsToUnknownCharacters(*sites);	

	TreeTemplate<Node>* ttree1=TreeTemplateTools::parenthesisToTree(tree,false,"ID");
	DiscreteRatesAcrossSitesTreeLikelihood* tl1;
	SubstitutionModel*    model    = 0;
	DiscreteDistribution* rDist    = 0;	
	model = new GTR(&AlphabetTools::RNA_ALPHABET);
	model->setFreqFromData(*sites);
	rDist = new GammaDiscreteDistribution(8, 1, 1);
	tl1 = new RHomogeneousTreeLikelihood(*ttree1, *sites, model, rDist, true, false, false);
	tl1->initialize();
	if (optimize_bls)
	  {
	    //Newton..
	    ParameterList * parameters= new ParameterList();
	    parameters->addParameters( tl1->getBranchLengthsParameters());
	    parameters->addParameters( tl1->getRateDistributionParameters());
	    OptimizationTools::optimizeNumericalParameters(
									     dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>  (tl1),
									     //tl1->getParameters(),
									     *parameters,
									     0,
									     1,
									     tolerance,
									     1000,
									     0,
									     0,
									     false,
									     0,
									     OptimizationTools::OPTIMIZATION_NEWTON,
									     //OptimizationTools::OPTIMIZATION_BRENT);
									     OptimizationTools::OPTIMIZATION_BFGS);
	
	    delete parameters;
	      }
	scalar_type LL=- tl1->getValue(); //Here's your log likelihood value !
	//tl1->getParameters().printParameters(cout);
	//cout << TreeTemplateTools::treeToParenthesis( tl1->getTree() ) <<endl;
	pair<string,scalar_type> return_pair;
	return_pair.first= TreeTemplateTools::treeToParenthesis( tl1->getTree() ) ;
	return_pair.second=LL;
	delete sites;
	delete alphabet;
	delete model;
	delete rDist;
	delete tl1;
	return 	return_pair;
}
Esempio n. 2
0
int main(int args, char** argv)
{
  cout << "******************************************************************" << endl;
  cout << "*     Bio++ Computation of site likelihoods inside mixed models  *" << endl;
  cout << "*                        Version 2.2.0.                          *" << endl;
  cout << "* Author: L. Guéguen                       Last Modif.: 25/09/14 *" << endl;
  cout << "******************************************************************" << endl;
  cout << endl;

  if (args == 1)
  {
    help();
    return 0;
  }

  try
  {
    BppApplication bppmixedlikelihoods(args, argv, "BppMixedLikelihoods");
    bppmixedlikelihoods.startTimer();

    Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppmixedlikelihoods.getParams(), "", false);
    auto_ptr<GeneticCode> gCode;
    CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet);
    if (codonAlphabet) {
      string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppmixedlikelihoods.getParams(), "Standard", "", true, true);
      ApplicationTools::displayResult("Genetic Code", codeDesc);
      
      gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
    }

    // get the data

    VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppmixedlikelihoods.getParams());

    VectorSiteContainer* sites = SequenceApplicationTools::getSitesToAnalyse(*allSites, bppmixedlikelihoods.getParams(), "", true, false);
    delete allSites;

    ApplicationTools::displayResult("Number of sequences", TextTools::toString(sites->getNumberOfSequences()));
    ApplicationTools::displayResult("Number of sites", TextTools::toString(sites->getNumberOfSites()));

    // Get the tree
    Tree* tree = PhylogeneticsApplicationTools::getTree(bppmixedlikelihoods.getParams());
    ApplicationTools::displayResult("Number of leaves", TextTools::toString(tree->getNumberOfLeaves()));


    AbstractDiscreteRatesAcrossSitesTreeLikelihood* tl;
    string nhOpt = ApplicationTools::getStringParameter("nonhomogeneous", bppmixedlikelihoods.getParams(), "no", "", true, false);
    ApplicationTools::displayResult("Heterogeneous model", nhOpt);

    MixedSubstitutionModel* model       = 0;
    MixedSubstitutionModelSet* modelSet = 0;
    DiscreteDistribution* rDist         = 0;

    if (nhOpt == "no")
    {
      model = dynamic_cast<MixedSubstitutionModel*>(PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams()));
      if (model == 0)
      {
        cout << "Model is not a Mixed model" << endl;
        exit(0);
      }

      SiteContainerTools::changeGapsToUnknownCharacters(*sites);
      if (model->getNumberOfStates() > model->getAlphabet()->getSize())
      {
        // Markov-modulated Markov model!
        rDist = new ConstantRateDistribution();
      }
      else
      {
        rDist = PhylogeneticsApplicationTools::getRateDistribution(bppmixedlikelihoods.getParams());
      }
      tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true);
    }
    else if (nhOpt == "one_per_branch")
    {
      model = dynamic_cast<MixedSubstitutionModel*>(PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams()));
      if (model == 0)
      {
        cout << "Model is not a Mixed model" << endl;
        exit(0);
      }

      SiteContainerTools::changeGapsToUnknownCharacters(*sites);
      if (model->getNumberOfStates() > model->getAlphabet()->getSize())
      {
        // Markov-modulated Markov model!
        rDist = new ConstantRateDistribution();
      }
      else
      {
        rDist = PhylogeneticsApplicationTools::getRateDistribution(bppmixedlikelihoods.getParams());
      }
      vector<double> rateFreqs;
      if (model->getNumberOfStates() != alphabet->getSize())
      {
        // Markov-Modulated Markov Model...
        unsigned int n = (unsigned int)(model->getNumberOfStates() / alphabet->getSize());
        rateFreqs = vector<double>(n, 1. / (double)n); // Equal rates assumed for now, may be changed later (actually, in the most general case,
        // we should assume a rate distribution for the root also!!!
      }
      
      std::map<std::string, std::string> aliasFreqNames;

      FrequenciesSet* rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams(), aliasFreqNames, rateFreqs);
      vector<string> globalParameters = ApplicationTools::getVectorParameter<string>("nonhomogeneous_one_per_branch.shared_parameters", bppmixedlikelihoods.getParams(), ',', "");
      modelSet = dynamic_cast<MixedSubstitutionModelSet*>(SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, aliasFreqNames, globalParameters));
      model = 0;
      tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true);
    }
    else if (nhOpt == "general")
    {
      modelSet = dynamic_cast<MixedSubstitutionModelSet*>(PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams()));
      if (modelSet == 0)
      {
        cout << "Missing a Mixed model" << endl;
        exit(0);
      }

      SiteContainerTools::changeGapsToUnknownCharacters(*sites);
      if (modelSet->getNumberOfStates() > modelSet->getAlphabet()->getSize())
      {
        // Markov-modulated Markov model!
        rDist = new ConstantDistribution(1.);
      }
      else
      {
        rDist = PhylogeneticsApplicationTools::getRateDistribution(bppmixedlikelihoods.getParams());
      }
      tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true);
    }
    else
      throw Exception("Unknown option for nonhomogeneous: " + nhOpt);

    tl->initialize();

    double logL = tl->getValue();
    if (isinf(logL))
    {
      // This may be due to null branch lengths, leading to null likelihood!
      ApplicationTools::displayWarning("!!! Warning!!! Likelihood is zero.");
      ApplicationTools::displayWarning("!!! This may be due to branch length == 0.");
      ApplicationTools::displayWarning("!!! All null branch lengths will be set to 0.000001.");
      ParameterList pl = tl->getBranchLengthsParameters();
      for (unsigned int i = 0; i < pl.size(); i++)
      {
        if (pl[i].getValue() < 0.000001)
          pl[i].setValue(0.000001);
      }
      tl->matchParametersValues(pl);
      logL = tl->getValue();
    }
    if (isinf(logL))
    {
      ApplicationTools::displayError("!!! Unexpected likelihood == 0.");
      ApplicationTools::displayError("!!! Looking at each site:");
      for (unsigned int i = 0; i < sites->getNumberOfSites(); i++)
      {
        (*ApplicationTools::error << "Site " << sites->getSite(i).getPosition() << "\tlog likelihood = " << tl->getLogLikelihoodForASite(i)).endLine();
      }
      ApplicationTools::displayError("!!! 0 values (inf in log) may be due to computer overflow, particularily if datasets are big (>~500 sequences).");
      exit(-1);
    }


    // Write parameters to screen:
    ApplicationTools::displayResult("Log likelihood", TextTools::toString(tl->getValue(), 15));
    ParameterList parameters = tl->getSubstitutionModelParameters();
    for (unsigned int i = 0; i < parameters.size(); i++)
    {
      ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
    }
    parameters = tl->getRateDistributionParameters();
    for (unsigned int i = 0; i < parameters.size(); i++)
    {
      ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
    }


    // /////////////////////////////////////////////
    // Getting likelihoods per submodel

    string outputFile;
    outputFile = ApplicationTools::getAFilePath("output.likelihoods.file", bppmixedlikelihoods.getParams(), true, false);
    ApplicationTools::displayResult("Output file for likelihoods", outputFile);
    ofstream out(outputFile.c_str(), ios::out);

    size_t nSites = sites->getNumberOfSites();

    size_t nummodel = ApplicationTools::getParameter<size_t>("likelihoods.model_number", bppmixedlikelihoods.getParams(), 1, "", true, true);

    string parname = ApplicationTools::getStringParameter("likelihoods.parameter_name", bppmixedlikelihoods.getParams(), "", "", true, false);

    if (modelSet && ((nummodel <= 0) || (nummodel > modelSet->getNumberOfModels())))
    {
      ApplicationTools::displayError("Bad number of model " + TextTools::toString(nummodel) + ".");
      exit(-1);
    }

    MixedSubstitutionModel* p0 = dynamic_cast<MixedSubstitutionModel*>(model ? model : modelSet->getModel(nummodel - 1));

    if (!p0)
    {
      ApplicationTools::displayError("Model " + TextTools::toString(nummodel) + " is not a Mixed Model.");
      exit(-1);
    }

    const AbstractBiblioMixedSubstitutionModel* ptmp = dynamic_cast<const AbstractBiblioMixedSubstitutionModel*>(p0);
    if (ptmp) {
      p0 = ptmp->getMixedModel().clone();

      if (nhOpt == "no")
        model = p0;
      else {
        modelSet->replaceModel(nummodel-1, p0);
        modelSet->isFullySetUpFor(*tree);
      }
    }
    
    //////////////////////////////////////////////////
    // Case of a MixtureOfSubstitutionModels

    MixtureOfSubstitutionModels* pMSM = dynamic_cast<MixtureOfSubstitutionModels*>(p0);

    if (pMSM)
    {
      vector<string> colNames;
      colNames.push_back("Sites");

      size_t nummod = pMSM->getNumberOfModels();
      for (unsigned int i = 0; i < nummod; i++)
      {
        colNames.push_back(pMSM->getNModel(i)->getName());
      }

      DataTable* rates = new DataTable(nSites, colNames.size());
      rates->setColumnNames(colNames);

      for (unsigned int i = 0; i < nSites; i++)
      {
        const Site* currentSite = &sites->getSite(i);
        int currentSitePosition = currentSite->getPosition();
        (*rates)(i, "Sites") = string("[" + TextTools::toString(currentSitePosition) + "]");
      }

      Vdouble vprob = pMSM->getProbabilities();
      for (unsigned int i = 0; i < nummod; i++)
      {
        string modname = pMSM->getNModel(i)->getName();

        for (unsigned int j = 0; j < nummod; j++)
        {
          pMSM->setNProbability(j, (j == i) ? 1 : 0);
        }

        if (tl)
          delete tl;

        if (nhOpt == "no")
          tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true, false, true);
        else
          tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, false, true);

        tl->initialize();
        logL = tl->getValue();
        Vdouble Vd = tl->getLogLikelihoodForEachSite();
        for (unsigned int j = 0; j < nSites; j++)
        {
          (*rates)(j, modname) = TextTools::toString(Vd[j]);
        }

        ApplicationTools::displayMessage("\n");
        ApplicationTools::displayMessage("Model " + modname + ":");
        ApplicationTools::displayResult("Log likelihood", TextTools::toString(tl->getValue(), 15));
        ApplicationTools::displayResult("Probability", TextTools::toString(vprob[i], 15));
      }

      DataTable::write(*rates, out, "\t");
    }

    //////////////////////////////////////////////////
    // Case of a MixtureOfASubstitutionModel

    else
    {
      MixtureOfASubstitutionModel* pMSM2 = dynamic_cast<MixtureOfASubstitutionModel*>(p0);
      if (pMSM2 != NULL)
      {
        size_t nummod = pMSM2->getNumberOfModels();
        if (parname == "")
        {
          ParameterList pl=pMSM2->getParameters();

          for (size_t i2 = 0; i2 < pl.size(); i2++)
          {
            string pl2n = pl[i2].getName();

            if (dynamic_cast<const ConstantDistribution*>(pMSM2->getDistribution(pl2n))==NULL)
            {
              parname=pl2n;

              while (parname.size()>0 && pMSM2->getDistribution(parname)==NULL)
                parname=pl2n.substr(0,pl2n.rfind("_"));

              if (parname.size()>0){
                ApplicationTools::displayResult("likelihoods.parameter_name", parname);
                break;
              }
            }
          }
        }

        if (parname == "")
        {
          ApplicationTools::displayError("Argument likelihoods.parameter_name is required.");
          exit(-1);
        }

        vector< Vint > vvnmod;
        size_t i2 = 0;
        while (i2 < nummod)
        {
          string par2 = parname + "_" + TextTools::toString(i2 + 1);
          Vint vnmod = pMSM2->getSubmodelNumbers(par2);
          if (vnmod.size() == 0)
            break;
          vvnmod.push_back(vnmod);
          i2++;
        }

        size_t nbcl = vvnmod.size();
        if (nbcl==0)
          throw Exception("Parameter " + parname + " is not mixed.");
        
        Vdouble vprob = pMSM2->getProbabilities();

        vector<vector<double> > vvprob;
        vector<double> vsprob;
        
        for (size_t i = 0; i < nbcl; i++)
        {
          vector<double> vprob2;
          for (size_t j = 0; j < vvnmod[i].size(); j++)
          {
            vprob2.push_back(vprob[static_cast<size_t>(vvnmod[i][j])]);
          }

          vvprob.push_back(vprob2);
          vsprob.push_back(VectorTools::sum(vvprob[i]));
        }

        vector<string> colNames;
        colNames.push_back("Sites");

        Vdouble dval;
        for (unsigned int i = 0; i < nbcl; i++)
        {
          SubstitutionModel* pSM = pMSM2->getNModel(static_cast<size_t>(vvnmod[i][0]));
          double valPar = pSM->getParameterValue(pSM->getParameterNameWithoutNamespace(parname));
          dval.push_back(valPar);
          colNames.push_back("Ll_" + parname + "=" + TextTools::toString(valPar));
        }
        for (unsigned int i = 0; i < nbcl; i++)
          colNames.push_back("Pr_" + parname + "=" + TextTools::toString(dval[i]));

        colNames.push_back("mean");

        DataTable* rates = new DataTable(nSites, colNames.size());
        rates->setColumnNames(colNames);

        for (unsigned int i = 0; i < nSites; i++)
        {
          const Site* currentSite = &sites->getSite(i);
          int currentSitePosition = currentSite->getPosition();
          (*rates)(i,"Sites")=TextTools::toString(currentSitePosition);
        }

        VVdouble vvd;

          
        vector<double> vRates = pMSM2->getVRates();

        for (size_t i = 0; i < nbcl; ++i)
        {
          string par2 = parname + "_" + TextTools::toString(i + 1);
          
          for (unsigned int j = 0; j < nummod; ++j)
            pMSM2->setNProbability(j, 0);

          for (size_t j = 0; j < vvprob[i].size(); ++j)
            pMSM2->setNProbability(static_cast<size_t>(vvnmod[i][j]), vvprob[i][j] / vsprob[i]);

          if (tl)
            delete tl;

          if (nhOpt == "no")
            tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true, false, true);
          else
            tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, false, true);

          tl->initialize();
          logL = tl->getValue();
          Vdouble vd = tl->getLogLikelihoodForEachSite();

          for (unsigned int j = 0; j < nSites; j++)
            (*rates)(j, i + 1) = TextTools::toString(vd[j]);

          vvd.push_back(vd);

          ApplicationTools::displayMessage("\n");
          ApplicationTools::displayMessage("Parameter " + par2 + "=" + TextTools::toString(dval[i]) + " with rate=" + TextTools::toString(vRates[i]));

          ApplicationTools::displayResult("Log likelihood", TextTools::toString(tl->getValue(), 15));
          ApplicationTools::displayResult("Probability", TextTools::toString(vsprob[i], 15));
        }

        for (unsigned int j = 0; j < nSites; j++)
        {
          Vdouble vd;
          for (unsigned int i = 0; i < nbcl; i++)
            vd.push_back(std::log(vsprob[i])+vvd[i][j]);
          
          VectorTools::logNorm(vd);
          for (unsigned int i = 0; i < nbcl; i++)
            (*rates)(j,nbcl + i + 1) = TextTools::toString(std::exp(vd[i]));
          (*rates)(j, 2 * nbcl + 1) = TextTools::toString(VectorTools::sumExp(vd, dval));
        }

        DataTable::write(*rates, out, "\t");
      }
    }

    delete alphabet;
    delete sites;
    if (model)
      delete model;
    if (modelSet)
      delete modelSet;
    delete rDist;
    delete tl;
    delete tree;
    ApplicationTools::displayMessage("\n");
    bppmixedlikelihoods.done();
  }

  catch (exception& e)
  {
    cout << e.what() << endl;
    return 1;
  }

  return 0;
}
Esempio n. 3
0
scalar_type tree_LL(string tree,string aln_filename,bool optimize_bls,scalar_type tolerance)
{

	const Alphabet* alphabet = new ProteicAlphabet();
	OrderedSequenceContainer *alignment;
	VectorSiteContainer* sites;
	Fasta Reader;
	//Phylip * Reader=new Phylip(true,true,100,true,"\r");
	alignment = Reader.read(aln_filename, alphabet);
	sites = new VectorSiteContainer(*alignment);
	SiteContainerTools::changeGapsToUnknownCharacters(*sites);
	
	TreeTemplate<Node>* ttree1=TreeTemplateTools::parenthesisToTree(tree,false,"ID");

	//Newick newick1;
	//ttree1 = newick1.read(tree);

	DiscreteRatesAcrossSitesTreeLikelihood* tl1;
	SubstitutionModel*    model    = 0;
	DiscreteDistribution* rDist    = 0;	

	model = new LG08(&AlphabetTools::PROTEIN_ALPHABET, new FullProteinFrequenciesSet(&AlphabetTools::PROTEIN_ALPHABET), true);
	model->setFreqFromData(*sites);

	rDist = new GammaDiscreteDistribution(4, 1, 1);

	tl1 = new RHomogeneousTreeLikelihood(*ttree1, *sites, model, rDist, true, false, false);
	tl1->initialize();
		/*

	if (optimize_bls)
	  {
	    Optimizer* optimizer = new PseudoNewtonOptimizer(tl1);
	    //	  Optimizer* optimizer = new PseudoNewtonOptimizer(tl1);

	    ParameterList * parameters= new ParameterList();
	    parameters->addParameters( tl1->getBranchLengthsParameters());
	    parameters->addParameters( tl1->getRateDistributionParameters());
	    //Newton..
	    optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
	    optimizer->setProfiler(0);
	    optimizer->setMessageHandler(0);
	    optimizer->setVerbose(0);
	    optimizer->getStopCondition()->setTolerance(0.01);
	    optimizer->init(*parameters);
	    //optimizer->init(tl1->getParameters());
	    optimizer->setMaximumNumberOfEvaluations(1000);
	    optimizer->optimize();
	    delete  parameters;
	    delete optimizer;       
	
	  }
		*/
	if (optimize_bls)
	  {
	    //Newton..
	    ParameterList * parameters= new ParameterList();
	    parameters->addParameters( tl1->getBranchLengthsParameters());
	    parameters->addParameters( tl1->getRateDistributionParameters());
	    OptimizationTools::optimizeNumericalParameters(
									     dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>  (tl1),
									     //tl1->getParameters(),
									     *parameters,
									     0,
									     1,
									     tolerance,
									     1000,
									     0,
									     0,
									     false,
									     0,
									     OptimizationTools::OPTIMIZATION_NEWTON,
									     //OptimizationTools::OPTIMIZATION_BRENT);
									     OptimizationTools::OPTIMIZATION_BFGS);
	
	    delete parameters;
	      }
	scalar_type LL=- tl1->getValue(); //Here's your log likelihood value !

	delete sites;
	delete alphabet;
	delete model;
	delete rDist;
	delete tl1;
	return 	LL;
}
void AbstractWordSubstitutionModel::updateMatrices()
{
  // First we update position specific models. This need to be done
  // here and not in fireParameterChanged, as some parameter aliases
  // might have been defined and need to be resolved first.
  if (VSubMod_.size() < 2 || VSubMod_[0] == VSubMod_[1])
    VSubMod_[0]->matchParametersValues(getParameters());
  else
    for (size_t i = 0; i < VSubMod_.size(); i++)
    {
      VSubMod_[i]->matchParametersValues(getParameters());
    }

  size_t nbmod = VSubMod_.size();
  size_t salph = getNumberOfStates();
  size_t nbStop = 0;
  vector<bool> vnull; // vector of the indices of lines with only zeros

  // Generator

  size_t i, j, n, l, k, m;

  vector<size_t> vsize;

  for (k = 0; k < nbmod; k++)
  {
    vsize.push_back(VSubMod_[k]->getNumberOfStates());
  }

  RowMatrix<double> gk, exch;

  m = 1;
  
  for (k = nbmod; k > 0; k--)
  {
    gk = VSubMod_[k - 1]->getGenerator();
    for (i = 0; i < vsize[k - 1]; i++)
    {
      for (j = 0; j < vsize[k - 1]; j++)
      {
        if (i != j)
        {
          n = 0;
          while (n < salph)
          { // loop on prefix
            for (l = 0; l < m; l++)
            { // loop on suffix
              generator_(n + i * m + l, n + j * m + l) = gk(i, j) * Vrate_[k - 1];
            }
            n += m * vsize[k - 1];
          }
        }
      }
    }
    m *= vsize[k - 1];
  }

  // modification of generator_

  this->completeMatrices();

  double x;

  for (i = 0; i < salph; i++)
  {
    x = 0;
    for (j = 0; j < salph; j++)
    {
      if (j != i)
        x += generator_(i, j);
    }
    generator_(i, i) = -x;
  }

  // at that point generator_ and freq_ are done for models without
  // enableEigenDecomposition

  // Eigen values:
  
  if (enableEigenDecomposition())
  {
    for (i = 0; i < salph; i++)
    {
      bool flag = true;
      for (j = 0; j < salph; j++)
      {
        if ((i != j) && abs(generator_(i, j)) > NumConstants::TINY())
        {
          flag = false;
          break;
        }
      }
      if (flag)
        nbStop++;
      vnull.push_back(flag);
    }

    if (nbStop != 0)
    {
      size_t gi = 0, gj = 0;

      gk.resize(salph - nbStop, salph - nbStop);
      for (i = 0; i < salph; i++)
      {
        if (!vnull[i])
        {
          gj = 0;
          for (j = 0; j < salph; j++)
          {
            if (!vnull[j])
            {
              gk(i - gi, j - gj) = generator_(i, j);
            }
            else
              gj++;
          }
        }
        else
          gi++;
      }

      EigenValue<double> ev(gk);
      eigenValues_ = ev.getRealEigenValues();
      iEigenValues_ = ev.getImagEigenValues();

      for (i = 0; i < nbStop; i++)
      {
        eigenValues_.push_back(0);
        iEigenValues_.push_back(0);
      }

      RowMatrix<double> rev = ev.getV();
      rightEigenVectors_.resize(salph, salph);
      gi = 0;
      for (i = 0; i < salph; i++)
      {
        if (vnull[i])
        {
          gi++;
          for (j = 0; j < salph; j++)
          {
            rightEigenVectors_(i, j) = 0;
          }

          rightEigenVectors_(i, salph - nbStop + gi - 1) = 1;
        }
        else
        {
          for (j = 0; j < salph - nbStop; j++)
          {
            rightEigenVectors_(i, j) = rev(i - gi, j);
          }

          for (j = salph - nbStop; j < salph; j++)
          {
            rightEigenVectors_(i, j) = 0;
          }
        }
      }
    }
    else
    {
      EigenValue<double> ev(generator_);
      eigenValues_ = ev.getRealEigenValues();
      iEigenValues_ = ev.getImagEigenValues();
      rightEigenVectors_ = ev.getV();
      nbStop = 0;
    }

    try
    {
      MatrixTools::inv(rightEigenVectors_, leftEigenVectors_);

      // is it diagonalizable ?

      isDiagonalizable_ = true;
      for (i = 0; i < size_ && isDiagonalizable_; i++)
      {
        if (abs(iEigenValues_[i]) > NumConstants::SMALL())
          isDiagonalizable_ = false;
      }

      // is it singular?

      // looking for the 0 eigenvector for which the non-stop right
      // eigen vector elements are equal.
      //

      size_t nulleigen = 0;
      double val;

      isNonSingular_ = false;
      while (nulleigen < salph - nbStop)
      {
        if ((abs(eigenValues_[nulleigen]) < NumConstants::SMALL()) && (abs(iEigenValues_[nulleigen]) < NumConstants::SMALL()))
        {
          i = 0;
          while (vnull[i])
            i++;
          
          val = rightEigenVectors_(i, nulleigen);
          i++;
          while (i < salph)
          {
            if (!vnull[i])
            {
              if (abs(rightEigenVectors_(i, nulleigen) - val) > NumConstants::SMALL())
                break;
            }
            i++;
          }
          
          if (i < salph)
            nulleigen++;
          else
          {
            isNonSingular_ = true;
            break;
          }
        }
        else
          nulleigen++;
      }
      
      if (isNonSingular_)
      {
        eigenValues_[nulleigen] = 0; // to avoid approximation errors on long long branches
        iEigenValues_[nulleigen] = 0; // to avoid approximation errors on long long branches
        
        for (i = 0; i < salph; i++)
          freq_[i] = leftEigenVectors_(nulleigen, i);
        
        x = 0;
        for (i = 0; i < salph; i++)
            x += freq_[i];
        
        for (i = 0; i < salph; i++)
          freq_[i] /= x;
      }
      
      else
      {
        ApplicationTools::displayMessage("Unable to find eigenvector for eigenvalue 1. Taylor series used instead.");
        isDiagonalizable_ = false;
      }
    }
    
    
    // if rightEigenVectors_ is singular
    catch (ZeroDivisionException& e)
    {
      ApplicationTools::displayMessage("Singularity during  diagonalization. Taylor series used instead.");
      isNonSingular_ = false;
      isDiagonalizable_ = false;
    }

    if (!isNonSingular_)
    {
      double min = generator_(0, 0);
      for (i = 1; i < salph; i++)
      {
        if (min > generator_(i, i))
          min = generator_(i, i);
      }

      MatrixTools::scale(generator_, -1 / min);

      if (vPowGen_.size() == 0)
        vPowGen_.resize(30);

      MatrixTools::getId(salph, tmpMat_);    // to compute the equilibrium frequency  (Q+Id)^256
      MatrixTools::add(tmpMat_, generator_);
      MatrixTools::pow(tmpMat_, 256, vPowGen_[0]);

      for (i = 0; i < salph; i++)
      {
        freq_[i] = vPowGen_[0](0, i);
      }

      MatrixTools::getId(salph, vPowGen_[0]);
    }

    // normalization

    x = 0;
    for (i = 0; i < salph; i++)
      x += freq_[i] * generator_(i, i);

    MatrixTools::scale(generator_, -1. / x);
    for (i = 0; i < salph; i++)
    {
      eigenValues_[i] /= -x;
      iEigenValues_[i] /= -x;
    }

    if (!isNonSingular_)
      MatrixTools::Taylor(generator_, 30, vPowGen_);
  }
  else  // compute freq_ is no eigenDecomposition
  {
    for (j = 0; j < size_; j++)
      freq_[j] = 1;
  
    m = 1;
    for (k = nbmod; k > 0; k--)
    {
      SubstitutionModel* pSM = VSubMod_[k - 1];
      for (j = 0; j < vsize[k - 1]; j++)
      {
        n = 0;
        while (n < salph)
        { // loop on prefix
          for (l = 0; l < m; l++)
          { // loop on suffix
            freq_[n + j * m + l] *=  pSM->freq(j);
          }
          n += m * vsize[k - 1];
        }
      }
      m *= vsize[k - 1];
    }
  }
  

  // compute the exchangeability_

  for (i = 0; i < size_; i++)
    for (j = 0; j < size_; j++)
      exchangeability_(i, j) = generator_(i, j) / freq_[j];
}
Esempio n. 5
0
int main() {

  TreeTemplate<Node>* tree = TreeTemplateTools::parenthesisToTree("(((A:0.1, B:0.2):0.3,C:0.1):0.2,(D:0.3,(E:0.2,F:0.05):0.1):0.1);");

  vector<string> seqNames= tree->getLeavesNames();
  vector<int> ids = tree->getNodesId();
  //-------------

  const NucleicAlphabet* alphabet = &AlphabetTools::DNA_ALPHABET;
  FrequenciesSet* rootFreqs = new GCFrequenciesSet(alphabet);
  
  SubstitutionModel* model = new T92(alphabet, 3.);
  std::vector<std::string> globalParameterNames;
  globalParameterNames.push_back("T92.kappa");

  //Very difficult to optimize on small datasets:
  DiscreteDistribution* rdist = new GammaDiscreteRateDistribution(4, 1.0);
  
  ParametrizableTree* parTree = new ParametrizableTree(*tree);
  FrequenciesSet* rootFreqs2 = rootFreqs->clone();
  DiscreteDistribution* rdist2 = rdist->clone();
  SubstitutionModel* model2=model->clone();

  map<string, string> alias;

  SubstitutionModelSet* modelSet = SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, alias, globalParameterNames);
  unique_ptr<SubstitutionModelSet> modelSetSim(modelSet->clone());

  NonHomogeneousSubstitutionProcess* subPro= NonHomogeneousSubstitutionProcess::createNonHomogeneousSubstitutionProcess(model2, rdist2, rootFreqs2, parTree, globalParameterNames);

  // Simulation
    
  size_t nsites = 1000;
  unsigned int nrep = 20;
  size_t nmodels = modelSet->getNumberOfModels();
  vector<double> thetas(nmodels);
  vector<double> thetasEst1(nmodels);
  vector<double> thetasEst2(nmodels);
  vector<double> thetasEst1n(nmodels);
  vector<double> thetasEst2n(nmodels);

  for (size_t i = 0; i < nmodels; ++i) {
    double theta = RandomTools::giveRandomNumberBetweenZeroAndEntry(0.99) + 0.005;
    cout << "Theta" << i << " set to " << theta << endl; 
    modelSetSim->setParameterValue("T92.theta_" + TextTools::toString(i + 1), theta);
    //subPro->setParameterValue("T92.theta_" + TextTools::toString(i + 1), theta);
    thetas[i] = theta;
  }

  NonHomogeneousSequenceSimulator simulator(modelSetSim.get(), rdist, tree);

  NonHomogeneousSubstitutionProcess* subPro2 = subPro->clone();

  for (unsigned int j = 0; j < nrep; j++) {

    OutputStream* profiler  = new StlOutputStream(new ofstream("profile.txt", ios::out));
    OutputStream* messenger = new StlOutputStream(new ofstream("messages.txt", ios::out));

    //Simulate data:
    unique_ptr<SiteContainer> sites(simulator.simulate(nsites));

    //Now fit model:
    unique_ptr<SubstitutionModelSet> modelSet2(modelSet->clone());

    RNonHomogeneousTreeLikelihood tl(*tree, *sites.get(), modelSet, rdist, true, true, false);
    tl.initialize();

    RNonHomogeneousTreeLikelihood tl2(*tree, *sites.get(), modelSet2.get(), rdist, true, true, true);
    tl2.initialize();

    SubstitutionProcess* nsubPro=subPro->clone();
    SubstitutionProcess* nsubPro2=subPro2->clone();
    
    RecursiveLikelihoodTreeCalculation* tlComp = new RecursiveLikelihoodTreeCalculation(*sites->clone(), nsubPro, true, false);
    SingleProcessPhyloLikelihood ntl(nsubPro, tlComp, true);

    RecursiveLikelihoodTreeCalculation* tlComp2 = new RecursiveLikelihoodTreeCalculation(*sites->clone(), nsubPro2, true);
    SingleProcessPhyloLikelihood ntl2(nsubPro2, tlComp2, true);

    for (size_t i = 0; i < nmodels; ++i) {
      ntl.setParameterValue("T92.theta_" + TextTools::toString(i + 1), thetas[i]);
      ntl2.setParameterValue("T92.theta_" + TextTools::toString(i + 1), thetas[i]);
    }

    cout << setprecision(10) << "OldTL init: "  << tl.getValue() << "\t" << tl2.getValue() << endl;
    cout << setprecision(10) << "NewTL init: "  << ntl.getValue() << "\t" << ntl2.getValue() << endl;

    unsigned int c1 = OptimizationTools::optimizeNumericalParameters2(
      &tl, tl.getParameters(), 0,
      0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);
    
    
    unsigned int c2 = OptimizationTools::optimizeNumericalParameters2(
      &tl2, tl2.getParameters(), 0,
      0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);

    unsigned int nc1 = OptimizationTools::optimizeNumericalParameters2(
      &ntl, ntl.getParameters(), 0,
      0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);

    unsigned int nc2 = OptimizationTools::optimizeNumericalParameters2(
      &ntl2, ntl2.getParameters(), 0,
      0.0001, 10000, messenger, profiler, false, false, 1, OptimizationTools::OPTIMIZATION_NEWTON);

    cout << "OldTL: " << c1 << ": " << tl.getValue() << "\t" << c2 << ": " << tl2.getValue() << endl;
    cout << "NewTL: " << nc1 << ": " << ntl.getValue() << "\t" << nc2 << ": " << ntl2.getValue() << endl;

    cout << "Thetas : " << endl;

    for (size_t i = 0; i < nmodels; ++i) {
      //    cerr << modelSet->getModel(i)->getParameter("theta").getValue() << "\t" << modelSet2->getModel(i)->getParameter("theta").getValue();
      
      //      cerr << "\t"  << subPro->getModel(i)->getParameter("theta").getValue() << "\t" << subPro2->getModel(i)->getParameter("theta").getValue() << endl;
      // if (abs(modelSet2->getModel(i)->getParameter("theta").getValue() - modelSet3->getModel(i)->getParameter("theta").getValue()) > 0.1)
      //   return 1;
      thetasEst1[i] +=  modelSet->getModel(i)->getParameter("theta").getValue();
      thetasEst2[i] +=  modelSet2->getModel(i)->getParameter("theta").getValue();
      thetasEst1n[i] +=  dynamic_cast< NonHomogeneousSubstitutionProcess*>(nsubPro)->getModel(i)->getParameter("theta").getValue();
      thetasEst2n[i] +=  dynamic_cast< NonHomogeneousSubstitutionProcess*>(nsubPro2)->getModel(i)->getParameter("theta").getValue();
    }
  }
  thetasEst1 /= static_cast<double>(nrep);
  thetasEst2 /= static_cast<double>(nrep);
  thetasEst1n /= static_cast<double>(nrep);
  thetasEst2n /= static_cast<double>(nrep);

  //Now compare estimated values to real ones:
  cout << "Real" << "\t" << "Est_Old1" << "\t" << "Est_Old2" << "\t";
  cout << "Est_New1" << "\t" << "Est_New2" << endl;
  for (size_t i = 0; i < thetas.size(); ++i) {
    cout << thetas[i] << "\t" << thetasEst1[i] << "\t" << thetasEst2[i] << "\t";
    cout << thetasEst1n[i] << "\t" << thetasEst2n[i] << endl;
     double diff1 = abs(thetas[i] - thetasEst1[i]);
     double diff2 = abs(thetas[i] - thetasEst2[i]);
     double diffn1 = abs(thetas[i] - thetasEst1n[i]);
     double diffn2 = abs(thetas[i] - thetasEst2n[i]);
     if (diff1 > 0.2 || diff2 > 0.2 || diffn1 > 0.2 || diffn2 > 0.2)
       return 1;
  }

  return 0;
}
Esempio n. 6
0
int main(int args, char ** argv)
{
  cout << "******************************************************************" << endl;
  cout << "*              Bio++ Distance Methods, version 2.2.0             *" << endl;
  cout << "* Author: J. Dutheil                        Created     05/05/07 *" << endl;
  cout << "*                                           Last Modif. 04/02/15 *" << endl;
  cout << "******************************************************************" << endl;
  cout << endl;

  if(args == 1)
  {
    help();
    return 0;
  }
  
  try {

  BppApplication bppdist(args, argv, "BppDist");
  bppdist.startTimer();

  Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppdist.getParams(), "", false);
  auto_ptr<GeneticCode> gCode;
  CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet);
  if (codonAlphabet) {
    string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppdist.getParams(), "Standard", "", true, true);
    ApplicationTools::displayResult("Genetic Code", codeDesc);

    gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
  }

  VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppdist.getParams());
  
  VectorSiteContainer* sites = SequenceApplicationTools::getSitesToAnalyse(* allSites, bppdist.getParams());
  delete allSites;

  ApplicationTools::displayResult("Number of sequences", TextTools::toString(sites->getNumberOfSequences()));
  ApplicationTools::displayResult("Number of sites", TextTools::toString(sites->getNumberOfSites()));
  
  SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppdist.getParams());
  
	DiscreteDistribution* rDist = 0;
  if (model->getNumberOfStates() > model->getAlphabet()->getSize())
  {
    //Markov-modulated Markov model!
    rDist = new ConstantRateDistribution();
  }
  else
  {
	  rDist = PhylogeneticsApplicationTools::getRateDistribution(bppdist.getParams());
  }
   
  DistanceEstimation distEstimation(model, rDist, sites, 1, false);
 
  string method = ApplicationTools::getStringParameter("method", bppdist.getParams(), "nj");
  ApplicationTools::displayResult("Tree reconstruction method", method);
  TreeTemplate<Node>* tree;
  AgglomerativeDistanceMethod* distMethod = 0;
  if(method == "wpgma")
  {
    PGMA* wpgma = new PGMA(true);
    distMethod = wpgma;
  }
  else if(method == "upgma")
  {
    PGMA* upgma = new PGMA(false);
    distMethod = upgma;
  }
  else if(method == "nj")
  {
    NeighborJoining* nj = new NeighborJoining();
    nj->outputPositiveLengths(true);
    distMethod = nj;
  }
  else if(method == "bionj")
  {
    BioNJ* bionj = new BioNJ();
    bionj->outputPositiveLengths(true);
    distMethod = bionj;
  }
  else throw Exception("Unknown tree reconstruction method.");
  
  string type = ApplicationTools::getStringParameter("optimization.method", bppdist.getParams(), "init");
  ApplicationTools::displayResult("Model parameters estimation method", type);
  if (type == "init") type = OptimizationTools::DISTANCEMETHOD_INIT;
  else if (type == "pairwise") type = OptimizationTools::DISTANCEMETHOD_PAIRWISE;
  else if (type == "iterations") type = OptimizationTools::DISTANCEMETHOD_ITERATIONS;
  else throw Exception("Unknown parameter estimation procedure '" + type + "'.");
  
	unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", bppdist.getParams(), 2);
	
	string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", bppdist.getParams(), false, false);
	OutputStream* messenger = 
		(mhPath == "none") ? 0 :
			(mhPath == "std") ? ApplicationTools::message :
				new StlOutputStream(new ofstream(mhPath.c_str(), ios::out));
	ApplicationTools::displayResult("Message handler", mhPath);

	string prPath = ApplicationTools::getAFilePath("optimization.profiler", bppdist.getParams(), false, false);
	OutputStream* profiler = 
		(prPath == "none") ? 0 :
			(prPath == "std") ? ApplicationTools::message :
				new StlOutputStream(new ofstream(prPath.c_str(), ios::out));
	if(profiler) profiler->setPrecision(20);
	ApplicationTools::displayResult("Profiler", prPath);

	// Should I ignore some parameters?
  ParameterList allParameters = model->getParameters();
  allParameters.addParameters(rDist->getParameters());
	ParameterList parametersToIgnore;
  string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameter", bppdist.getParams(), "", "", true, false);
	bool ignoreBrLen = false;
  StringTokenizer st(paramListDesc, ",");
	while (st.hasMoreToken())
  {
		try
    {
      string param = st.nextToken();
      if (param == "BrLen")
        ignoreBrLen = true;
      else
      {
        if (allParameters.hasParameter(param))
        {
          Parameter* p = &allParameters.getParameter(param);
          parametersToIgnore.addParameter(*p);
        }
        else ApplicationTools::displayWarning("Parameter '" + param + "' not found."); 
      }
		} 
    catch (ParameterNotFoundException& pnfe)
    {
			ApplicationTools::displayError("Parameter '" + pnfe.getParameter() + "' not found, and so can't be ignored!");
		}
	}
	
	unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", bppdist.getParams(), 1000000);
	ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax));
	
	double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", bppdist.getParams(), .000001);
	ApplicationTools::displayResult("Tolerance", TextTools::toString(tolerance));
	
  //Here it is:
  ofstream warn("warnings", ios::out);
  ApplicationTools::warning = new StlOutputStreamWrapper(&warn);
  tree = OptimizationTools::buildDistanceTree(distEstimation, *distMethod, parametersToIgnore, !ignoreBrLen, type, tolerance, nbEvalMax, profiler, messenger, optVerbose);
  warn.close();
  delete ApplicationTools::warning;
  ApplicationTools::warning = ApplicationTools::message;

  string matrixPath = ApplicationTools::getAFilePath("output.matrix.file", bppdist.getParams(), false, false, "", false);
  if (matrixPath != "none")
  {
    ApplicationTools::displayResult("Output matrix file", matrixPath);
    string matrixFormat = ApplicationTools::getAFilePath("output.matrix.format", bppdist.getParams(), false, false, "", false);
    string format = "";
    bool extended = false;
    std::map<std::string, std::string> unparsedArguments_;
    KeyvalTools::parseProcedure(matrixFormat, format, unparsedArguments_);
    if (unparsedArguments_.find("type") != unparsedArguments_.end())
    {
      if (unparsedArguments_["type"] == "extended")
      {
        extended = true;
      }     
      else if (unparsedArguments_["type"] == "classic")
        extended = false;
      else
        ApplicationTools::displayWarning("Argument '" +
                                         unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " +
                                         "Default used instead: not extended.");
    }    
    else
      ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: not extended.");
    

    ODistanceMatrix* odm = IODistanceMatrixFactory().createWriter(IODistanceMatrixFactory::PHYLIP_FORMAT, extended);
    odm->write(*distEstimation.getMatrix(), matrixPath, true);
    delete odm;
  }
  PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams());
  
  //Output some parameters:
  if (type == OptimizationTools::DISTANCEMETHOD_ITERATIONS)
  {
    // Write parameters to screen:
    ParameterList parameters = model->getParameters();
    for (unsigned int i = 0; i < parameters.size(); i++)
    {
		  ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
    }
    parameters = rDist->getParameters();
    for (unsigned int i = 0; i < parameters.size(); i++)
    {
		  ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
    }
    // Write parameters to file:
	  string parametersFile = ApplicationTools::getAFilePath("output.estimates", bppdist.getParams(), false, false);
    if (parametersFile != "none")
    {
		  ofstream out(parametersFile.c_str(), ios::out);
      parameters = model->getParameters();
      for (unsigned int i = 0; i < parameters.size(); i++)
      {
        out << parameters[i].getName() << " = " << parameters[i].getValue() << endl;
      }
      parameters = rDist->getParameters();
      for (unsigned int i = 0; i < parameters.size(); i++)
      {
        out << parameters[i].getName() << " = " << parameters[i].getValue() << endl;
      }
      out.close();
    }
  }
 
  //Bootstrap:
  unsigned int nbBS = ApplicationTools::getParameter<unsigned int>("bootstrap.number", bppdist.getParams(), 0);
  if(nbBS > 0)
  {
    ApplicationTools::displayResult("Number of bootstrap samples", TextTools::toString(nbBS));
    bool approx = ApplicationTools::getBooleanParameter("bootstrap.approximate", bppdist.getParams(), true);
    ApplicationTools::displayResult("Use approximate bootstrap", TextTools::toString(approx ? "yes" : "no"));
    if(approx)
    {
      type = OptimizationTools::DISTANCEMETHOD_INIT;
      parametersToIgnore = allParameters;
      ignoreBrLen = true;
    }
    bool bootstrapVerbose = ApplicationTools::getBooleanParameter("bootstrap.verbose", bppdist.getParams(), false, "", true, false);
 
    string bsTreesPath = ApplicationTools::getAFilePath("bootstrap.output.file", bppdist.getParams(), false, false);
    ofstream *out = NULL;
    if(bsTreesPath != "none")
    {
      ApplicationTools::displayResult("Bootstrap trees stored in file", bsTreesPath);
      out = new ofstream(bsTreesPath.c_str(), ios::out);
    }
    Newick newick;
    
    vector<Tree *> bsTrees(nbBS);
    ApplicationTools::displayTask("Bootstrapping", true);
    for(unsigned int i = 0; i < nbBS; i++)
    {
      ApplicationTools::displayGauge(i, nbBS-1, '=');
      VectorSiteContainer * sample = SiteContainerTools::bootstrapSites(*sites);
      if(approx) model->setFreqFromData(*sample);
      distEstimation.setData(sample);
      bsTrees[i] = OptimizationTools::buildDistanceTree(
          distEstimation,
          *distMethod,
          parametersToIgnore,
          ignoreBrLen,
          type,
          tolerance,
          nbEvalMax,
          NULL,
          NULL,
          (bootstrapVerbose ? 1 : 0)
        );
      if(out && i == 0) newick.write(*bsTrees[i], bsTreesPath, true);
      if(out && i >  0) newick.write(*bsTrees[i], bsTreesPath, false);
      delete sample;
    }
    if(out) out->close();
    if(out) delete out;
    ApplicationTools::displayTaskDone();
    ApplicationTools::displayTask("Compute bootstrap values");
    TreeTools::computeBootstrapValues(*tree, bsTrees);
    ApplicationTools::displayTaskDone();
    for(unsigned int i = 0; i < nbBS; i++) delete bsTrees[i];

    //Write resulting tree:
    PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams());
  }
    
  delete alphabet;
  delete sites;
  delete distMethod;
  delete tree;

  bppdist.done();}
  
      
  catch(exception & e)
  {
    cout << e.what() << endl;
    return 1;
  }

  return 0;
}