示例#1
0
pair<string,scalar_type> tree_LL_nucl(string tree,string aln_filename,bool optimize_bls,scalar_type tolerance)
{
  //const Alphabet* alphabet = new ProteicAlphabet();
  const Alphabet* alphabet = new RNA();
	OrderedSequenceContainer *alignment;
	VectorSiteContainer* sites;
	Fasta Reader;
	//NexusIOSequence Reader;
	//Phylip * Reader=new Phylip(true,true,100,true,"\r");
	alignment = Reader.read(aln_filename, alphabet);
	sites = new VectorSiteContainer(*alignment);
	SiteContainerTools::removeGapOnlySites(*sites);	
	SiteContainerTools::changeGapsToUnknownCharacters(*sites);	

	TreeTemplate<Node>* ttree1=TreeTemplateTools::parenthesisToTree(tree,false,"ID");
	DiscreteRatesAcrossSitesTreeLikelihood* tl1;
	SubstitutionModel*    model    = 0;
	DiscreteDistribution* rDist    = 0;	
	model = new GTR(&AlphabetTools::RNA_ALPHABET);
	model->setFreqFromData(*sites);
	rDist = new GammaDiscreteDistribution(8, 1, 1);
	tl1 = new RHomogeneousTreeLikelihood(*ttree1, *sites, model, rDist, true, false, false);
	tl1->initialize();
	if (optimize_bls)
	  {
	    //Newton..
	    ParameterList * parameters= new ParameterList();
	    parameters->addParameters( tl1->getBranchLengthsParameters());
	    parameters->addParameters( tl1->getRateDistributionParameters());
	    OptimizationTools::optimizeNumericalParameters(
									     dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>  (tl1),
									     //tl1->getParameters(),
									     *parameters,
									     0,
									     1,
									     tolerance,
									     1000,
									     0,
									     0,
									     false,
									     0,
									     OptimizationTools::OPTIMIZATION_NEWTON,
									     //OptimizationTools::OPTIMIZATION_BRENT);
									     OptimizationTools::OPTIMIZATION_BFGS);
	
	    delete parameters;
	      }
	scalar_type LL=- tl1->getValue(); //Here's your log likelihood value !
	//tl1->getParameters().printParameters(cout);
	//cout << TreeTemplateTools::treeToParenthesis( tl1->getTree() ) <<endl;
	pair<string,scalar_type> return_pair;
	return_pair.first= TreeTemplateTools::treeToParenthesis( tl1->getTree() ) ;
	return_pair.second=LL;
	delete sites;
	delete alphabet;
	delete model;
	delete rDist;
	delete tl1;
	return 	return_pair;
}
示例#2
0
scalar_type tree_LL(string tree,string aln_filename,bool optimize_bls,scalar_type tolerance)
{

	const Alphabet* alphabet = new ProteicAlphabet();
	OrderedSequenceContainer *alignment;
	VectorSiteContainer* sites;
	Fasta Reader;
	//Phylip * Reader=new Phylip(true,true,100,true,"\r");
	alignment = Reader.read(aln_filename, alphabet);
	sites = new VectorSiteContainer(*alignment);
	SiteContainerTools::changeGapsToUnknownCharacters(*sites);
	
	TreeTemplate<Node>* ttree1=TreeTemplateTools::parenthesisToTree(tree,false,"ID");

	//Newick newick1;
	//ttree1 = newick1.read(tree);

	DiscreteRatesAcrossSitesTreeLikelihood* tl1;
	SubstitutionModel*    model    = 0;
	DiscreteDistribution* rDist    = 0;	

	model = new LG08(&AlphabetTools::PROTEIN_ALPHABET, new FullProteinFrequenciesSet(&AlphabetTools::PROTEIN_ALPHABET), true);
	model->setFreqFromData(*sites);

	rDist = new GammaDiscreteDistribution(4, 1, 1);

	tl1 = new RHomogeneousTreeLikelihood(*ttree1, *sites, model, rDist, true, false, false);
	tl1->initialize();
		/*

	if (optimize_bls)
	  {
	    Optimizer* optimizer = new PseudoNewtonOptimizer(tl1);
	    //	  Optimizer* optimizer = new PseudoNewtonOptimizer(tl1);

	    ParameterList * parameters= new ParameterList();
	    parameters->addParameters( tl1->getBranchLengthsParameters());
	    parameters->addParameters( tl1->getRateDistributionParameters());
	    //Newton..
	    optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
	    optimizer->setProfiler(0);
	    optimizer->setMessageHandler(0);
	    optimizer->setVerbose(0);
	    optimizer->getStopCondition()->setTolerance(0.01);
	    optimizer->init(*parameters);
	    //optimizer->init(tl1->getParameters());
	    optimizer->setMaximumNumberOfEvaluations(1000);
	    optimizer->optimize();
	    delete  parameters;
	    delete optimizer;       
	
	  }
		*/
	if (optimize_bls)
	  {
	    //Newton..
	    ParameterList * parameters= new ParameterList();
	    parameters->addParameters( tl1->getBranchLengthsParameters());
	    parameters->addParameters( tl1->getRateDistributionParameters());
	    OptimizationTools::optimizeNumericalParameters(
									     dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>  (tl1),
									     //tl1->getParameters(),
									     *parameters,
									     0,
									     1,
									     tolerance,
									     1000,
									     0,
									     0,
									     false,
									     0,
									     OptimizationTools::OPTIMIZATION_NEWTON,
									     //OptimizationTools::OPTIMIZATION_BRENT);
									     OptimizationTools::OPTIMIZATION_BFGS);
	
	    delete parameters;
	      }
	scalar_type LL=- tl1->getValue(); //Here's your log likelihood value !

	delete sites;
	delete alphabet;
	delete model;
	delete rDist;
	delete tl1;
	return 	LL;
}
示例#3
0
int main(int args, char ** argv)
{
  cout << "******************************************************************" << endl;
  cout << "*              Bio++ Distance Methods, version 2.2.0             *" << endl;
  cout << "* Author: J. Dutheil                        Created     05/05/07 *" << endl;
  cout << "*                                           Last Modif. 04/02/15 *" << endl;
  cout << "******************************************************************" << endl;
  cout << endl;

  if(args == 1)
  {
    help();
    return 0;
  }
  
  try {

  BppApplication bppdist(args, argv, "BppDist");
  bppdist.startTimer();

  Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppdist.getParams(), "", false);
  auto_ptr<GeneticCode> gCode;
  CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet);
  if (codonAlphabet) {
    string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppdist.getParams(), "Standard", "", true, true);
    ApplicationTools::displayResult("Genetic Code", codeDesc);

    gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc));
  }

  VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppdist.getParams());
  
  VectorSiteContainer* sites = SequenceApplicationTools::getSitesToAnalyse(* allSites, bppdist.getParams());
  delete allSites;

  ApplicationTools::displayResult("Number of sequences", TextTools::toString(sites->getNumberOfSequences()));
  ApplicationTools::displayResult("Number of sites", TextTools::toString(sites->getNumberOfSites()));
  
  SubstitutionModel* model = PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppdist.getParams());
  
	DiscreteDistribution* rDist = 0;
  if (model->getNumberOfStates() > model->getAlphabet()->getSize())
  {
    //Markov-modulated Markov model!
    rDist = new ConstantRateDistribution();
  }
  else
  {
	  rDist = PhylogeneticsApplicationTools::getRateDistribution(bppdist.getParams());
  }
   
  DistanceEstimation distEstimation(model, rDist, sites, 1, false);
 
  string method = ApplicationTools::getStringParameter("method", bppdist.getParams(), "nj");
  ApplicationTools::displayResult("Tree reconstruction method", method);
  TreeTemplate<Node>* tree;
  AgglomerativeDistanceMethod* distMethod = 0;
  if(method == "wpgma")
  {
    PGMA* wpgma = new PGMA(true);
    distMethod = wpgma;
  }
  else if(method == "upgma")
  {
    PGMA* upgma = new PGMA(false);
    distMethod = upgma;
  }
  else if(method == "nj")
  {
    NeighborJoining* nj = new NeighborJoining();
    nj->outputPositiveLengths(true);
    distMethod = nj;
  }
  else if(method == "bionj")
  {
    BioNJ* bionj = new BioNJ();
    bionj->outputPositiveLengths(true);
    distMethod = bionj;
  }
  else throw Exception("Unknown tree reconstruction method.");
  
  string type = ApplicationTools::getStringParameter("optimization.method", bppdist.getParams(), "init");
  ApplicationTools::displayResult("Model parameters estimation method", type);
  if (type == "init") type = OptimizationTools::DISTANCEMETHOD_INIT;
  else if (type == "pairwise") type = OptimizationTools::DISTANCEMETHOD_PAIRWISE;
  else if (type == "iterations") type = OptimizationTools::DISTANCEMETHOD_ITERATIONS;
  else throw Exception("Unknown parameter estimation procedure '" + type + "'.");
  
	unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", bppdist.getParams(), 2);
	
	string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", bppdist.getParams(), false, false);
	OutputStream* messenger = 
		(mhPath == "none") ? 0 :
			(mhPath == "std") ? ApplicationTools::message :
				new StlOutputStream(new ofstream(mhPath.c_str(), ios::out));
	ApplicationTools::displayResult("Message handler", mhPath);

	string prPath = ApplicationTools::getAFilePath("optimization.profiler", bppdist.getParams(), false, false);
	OutputStream* profiler = 
		(prPath == "none") ? 0 :
			(prPath == "std") ? ApplicationTools::message :
				new StlOutputStream(new ofstream(prPath.c_str(), ios::out));
	if(profiler) profiler->setPrecision(20);
	ApplicationTools::displayResult("Profiler", prPath);

	// Should I ignore some parameters?
  ParameterList allParameters = model->getParameters();
  allParameters.addParameters(rDist->getParameters());
	ParameterList parametersToIgnore;
  string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameter", bppdist.getParams(), "", "", true, false);
	bool ignoreBrLen = false;
  StringTokenizer st(paramListDesc, ",");
	while (st.hasMoreToken())
  {
		try
    {
      string param = st.nextToken();
      if (param == "BrLen")
        ignoreBrLen = true;
      else
      {
        if (allParameters.hasParameter(param))
        {
          Parameter* p = &allParameters.getParameter(param);
          parametersToIgnore.addParameter(*p);
        }
        else ApplicationTools::displayWarning("Parameter '" + param + "' not found."); 
      }
		} 
    catch (ParameterNotFoundException& pnfe)
    {
			ApplicationTools::displayError("Parameter '" + pnfe.getParameter() + "' not found, and so can't be ignored!");
		}
	}
	
	unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", bppdist.getParams(), 1000000);
	ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax));
	
	double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", bppdist.getParams(), .000001);
	ApplicationTools::displayResult("Tolerance", TextTools::toString(tolerance));
	
  //Here it is:
  ofstream warn("warnings", ios::out);
  ApplicationTools::warning = new StlOutputStreamWrapper(&warn);
  tree = OptimizationTools::buildDistanceTree(distEstimation, *distMethod, parametersToIgnore, !ignoreBrLen, type, tolerance, nbEvalMax, profiler, messenger, optVerbose);
  warn.close();
  delete ApplicationTools::warning;
  ApplicationTools::warning = ApplicationTools::message;

  string matrixPath = ApplicationTools::getAFilePath("output.matrix.file", bppdist.getParams(), false, false, "", false);
  if (matrixPath != "none")
  {
    ApplicationTools::displayResult("Output matrix file", matrixPath);
    string matrixFormat = ApplicationTools::getAFilePath("output.matrix.format", bppdist.getParams(), false, false, "", false);
    string format = "";
    bool extended = false;
    std::map<std::string, std::string> unparsedArguments_;
    KeyvalTools::parseProcedure(matrixFormat, format, unparsedArguments_);
    if (unparsedArguments_.find("type") != unparsedArguments_.end())
    {
      if (unparsedArguments_["type"] == "extended")
      {
        extended = true;
      }     
      else if (unparsedArguments_["type"] == "classic")
        extended = false;
      else
        ApplicationTools::displayWarning("Argument '" +
                                         unparsedArguments_["type"] + "' for parameter 'Phylip#type' is unknown. " +
                                         "Default used instead: not extended.");
    }    
    else
      ApplicationTools::displayWarning("Argument 'Phylip#type' not found. Default used instead: not extended.");
    

    ODistanceMatrix* odm = IODistanceMatrixFactory().createWriter(IODistanceMatrixFactory::PHYLIP_FORMAT, extended);
    odm->write(*distEstimation.getMatrix(), matrixPath, true);
    delete odm;
  }
  PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams());
  
  //Output some parameters:
  if (type == OptimizationTools::DISTANCEMETHOD_ITERATIONS)
  {
    // Write parameters to screen:
    ParameterList parameters = model->getParameters();
    for (unsigned int i = 0; i < parameters.size(); i++)
    {
		  ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
    }
    parameters = rDist->getParameters();
    for (unsigned int i = 0; i < parameters.size(); i++)
    {
		  ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
    }
    // Write parameters to file:
	  string parametersFile = ApplicationTools::getAFilePath("output.estimates", bppdist.getParams(), false, false);
    if (parametersFile != "none")
    {
		  ofstream out(parametersFile.c_str(), ios::out);
      parameters = model->getParameters();
      for (unsigned int i = 0; i < parameters.size(); i++)
      {
        out << parameters[i].getName() << " = " << parameters[i].getValue() << endl;
      }
      parameters = rDist->getParameters();
      for (unsigned int i = 0; i < parameters.size(); i++)
      {
        out << parameters[i].getName() << " = " << parameters[i].getValue() << endl;
      }
      out.close();
    }
  }
 
  //Bootstrap:
  unsigned int nbBS = ApplicationTools::getParameter<unsigned int>("bootstrap.number", bppdist.getParams(), 0);
  if(nbBS > 0)
  {
    ApplicationTools::displayResult("Number of bootstrap samples", TextTools::toString(nbBS));
    bool approx = ApplicationTools::getBooleanParameter("bootstrap.approximate", bppdist.getParams(), true);
    ApplicationTools::displayResult("Use approximate bootstrap", TextTools::toString(approx ? "yes" : "no"));
    if(approx)
    {
      type = OptimizationTools::DISTANCEMETHOD_INIT;
      parametersToIgnore = allParameters;
      ignoreBrLen = true;
    }
    bool bootstrapVerbose = ApplicationTools::getBooleanParameter("bootstrap.verbose", bppdist.getParams(), false, "", true, false);
 
    string bsTreesPath = ApplicationTools::getAFilePath("bootstrap.output.file", bppdist.getParams(), false, false);
    ofstream *out = NULL;
    if(bsTreesPath != "none")
    {
      ApplicationTools::displayResult("Bootstrap trees stored in file", bsTreesPath);
      out = new ofstream(bsTreesPath.c_str(), ios::out);
    }
    Newick newick;
    
    vector<Tree *> bsTrees(nbBS);
    ApplicationTools::displayTask("Bootstrapping", true);
    for(unsigned int i = 0; i < nbBS; i++)
    {
      ApplicationTools::displayGauge(i, nbBS-1, '=');
      VectorSiteContainer * sample = SiteContainerTools::bootstrapSites(*sites);
      if(approx) model->setFreqFromData(*sample);
      distEstimation.setData(sample);
      bsTrees[i] = OptimizationTools::buildDistanceTree(
          distEstimation,
          *distMethod,
          parametersToIgnore,
          ignoreBrLen,
          type,
          tolerance,
          nbEvalMax,
          NULL,
          NULL,
          (bootstrapVerbose ? 1 : 0)
        );
      if(out && i == 0) newick.write(*bsTrees[i], bsTreesPath, true);
      if(out && i >  0) newick.write(*bsTrees[i], bsTreesPath, false);
      delete sample;
    }
    if(out) out->close();
    if(out) delete out;
    ApplicationTools::displayTaskDone();
    ApplicationTools::displayTask("Compute bootstrap values");
    TreeTools::computeBootstrapValues(*tree, bsTrees);
    ApplicationTools::displayTaskDone();
    for(unsigned int i = 0; i < nbBS; i++) delete bsTrees[i];

    //Write resulting tree:
    PhylogeneticsApplicationTools::writeTree(*tree, bppdist.getParams());
  }
    
  delete alphabet;
  delete sites;
  delete distMethod;
  delete tree;

  bppdist.done();}
  
      
  catch(exception & e)
  {
    cout << e.what() << endl;
    return 1;
  }

  return 0;
}