Ejemplo n.º 1
0
pair<string,scalar_type> tree_LL_nucl(string tree,string aln_filename,bool optimize_bls,scalar_type tolerance)
{
  //const Alphabet* alphabet = new ProteicAlphabet();
  const Alphabet* alphabet = new RNA();
	OrderedSequenceContainer *alignment;
	VectorSiteContainer* sites;
	Fasta Reader;
	//NexusIOSequence Reader;
	//Phylip * Reader=new Phylip(true,true,100,true,"\r");
	alignment = Reader.read(aln_filename, alphabet);
	sites = new VectorSiteContainer(*alignment);
	SiteContainerTools::removeGapOnlySites(*sites);	
	SiteContainerTools::changeGapsToUnknownCharacters(*sites);	

	TreeTemplate<Node>* ttree1=TreeTemplateTools::parenthesisToTree(tree,false,"ID");
	DiscreteRatesAcrossSitesTreeLikelihood* tl1;
	SubstitutionModel*    model    = 0;
	DiscreteDistribution* rDist    = 0;	
	model = new GTR(&AlphabetTools::RNA_ALPHABET);
	model->setFreqFromData(*sites);
	rDist = new GammaDiscreteDistribution(8, 1, 1);
	tl1 = new RHomogeneousTreeLikelihood(*ttree1, *sites, model, rDist, true, false, false);
	tl1->initialize();
	if (optimize_bls)
	  {
	    //Newton..
	    ParameterList * parameters= new ParameterList();
	    parameters->addParameters( tl1->getBranchLengthsParameters());
	    parameters->addParameters( tl1->getRateDistributionParameters());
	    OptimizationTools::optimizeNumericalParameters(
									     dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>  (tl1),
									     //tl1->getParameters(),
									     *parameters,
									     0,
									     1,
									     tolerance,
									     1000,
									     0,
									     0,
									     false,
									     0,
									     OptimizationTools::OPTIMIZATION_NEWTON,
									     //OptimizationTools::OPTIMIZATION_BRENT);
									     OptimizationTools::OPTIMIZATION_BFGS);
	
	    delete parameters;
	      }
	scalar_type LL=- tl1->getValue(); //Here's your log likelihood value !
	//tl1->getParameters().printParameters(cout);
	//cout << TreeTemplateTools::treeToParenthesis( tl1->getTree() ) <<endl;
	pair<string,scalar_type> return_pair;
	return_pair.first= TreeTemplateTools::treeToParenthesis( tl1->getTree() ) ;
	return_pair.second=LL;
	delete sites;
	delete alphabet;
	delete model;
	delete rDist;
	delete tl1;
	return 	return_pair;
}
Ejemplo n.º 2
0
Fasta extractInterestingGenes(Fasta &repertoire, string name) {
  Fasta interesting;
  
  int size = repertoire.size();
  for (int i = 0; i < size; i++) {
    if (repertoire.label(i).find(name) != string::npos) {
      interesting.add(repertoire.read(i));
    }
  }

  return interesting;
}
Ejemplo n.º 3
0
scalar_type tree_LL(string tree,string aln_filename,bool optimize_bls,scalar_type tolerance)
{

	const Alphabet* alphabet = new ProteicAlphabet();
	OrderedSequenceContainer *alignment;
	VectorSiteContainer* sites;
	Fasta Reader;
	//Phylip * Reader=new Phylip(true,true,100,true,"\r");
	alignment = Reader.read(aln_filename, alphabet);
	sites = new VectorSiteContainer(*alignment);
	SiteContainerTools::changeGapsToUnknownCharacters(*sites);
	
	TreeTemplate<Node>* ttree1=TreeTemplateTools::parenthesisToTree(tree,false,"ID");

	//Newick newick1;
	//ttree1 = newick1.read(tree);

	DiscreteRatesAcrossSitesTreeLikelihood* tl1;
	SubstitutionModel*    model    = 0;
	DiscreteDistribution* rDist    = 0;	

	model = new LG08(&AlphabetTools::PROTEIN_ALPHABET, new FullProteinFrequenciesSet(&AlphabetTools::PROTEIN_ALPHABET), true);
	model->setFreqFromData(*sites);

	rDist = new GammaDiscreteDistribution(4, 1, 1);

	tl1 = new RHomogeneousTreeLikelihood(*ttree1, *sites, model, rDist, true, false, false);
	tl1->initialize();
		/*

	if (optimize_bls)
	  {
	    Optimizer* optimizer = new PseudoNewtonOptimizer(tl1);
	    //	  Optimizer* optimizer = new PseudoNewtonOptimizer(tl1);

	    ParameterList * parameters= new ParameterList();
	    parameters->addParameters( tl1->getBranchLengthsParameters());
	    parameters->addParameters( tl1->getRateDistributionParameters());
	    //Newton..
	    optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
	    optimizer->setProfiler(0);
	    optimizer->setMessageHandler(0);
	    optimizer->setVerbose(0);
	    optimizer->getStopCondition()->setTolerance(0.01);
	    optimizer->init(*parameters);
	    //optimizer->init(tl1->getParameters());
	    optimizer->setMaximumNumberOfEvaluations(1000);
	    optimizer->optimize();
	    delete  parameters;
	    delete optimizer;       
	
	  }
		*/
	if (optimize_bls)
	  {
	    //Newton..
	    ParameterList * parameters= new ParameterList();
	    parameters->addParameters( tl1->getBranchLengthsParameters());
	    parameters->addParameters( tl1->getRateDistributionParameters());
	    OptimizationTools::optimizeNumericalParameters(
									     dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>  (tl1),
									     //tl1->getParameters(),
									     *parameters,
									     0,
									     1,
									     tolerance,
									     1000,
									     0,
									     0,
									     false,
									     0,
									     OptimizationTools::OPTIMIZATION_NEWTON,
									     //OptimizationTools::OPTIMIZATION_BRENT);
									     OptimizationTools::OPTIMIZATION_BFGS);
	
	    delete parameters;
	      }
	scalar_type LL=- tl1->getValue(); //Here's your log likelihood value !

	delete sites;
	delete alphabet;
	delete model;
	delete rDist;
	delete tl1;
	return 	LL;
}
Ejemplo n.º 4
0
void align_against_collection(string &read, Fasta &rep, int forbidden_rep_id,
                              bool reverse_ref, bool reverse_both, bool local,
                             AlignBox *box, Cost segment_cost)
{
  
  int best_score = MINUS_INF ;
  box->ref_nb = MINUS_INF ;
  int best_best_i = (int) string::npos ;
  int best_best_j = (int) string::npos ;
  int best_first_i = (int) string::npos ;
  int best_first_j = (int) string::npos ;

  vector<pair<int, int> > score_r;

  DynProg::DynProgMode dpMode = DynProg::LocalEndWithSomeDeletions;
  if (local==true) dpMode = DynProg::Local;

  // With reverse_ref, the read is reversed to prevent calling revcomp on each reference sequence
  string sequence_or_rc = revcomp(read, reverse_ref);
  
  for (int r = 0 ; r < rep.size() ; r++)
    {
      if (r == forbidden_rep_id)
        continue;

      DynProg dp = DynProg(sequence_or_rc, rep.sequence(r),
			   dpMode, // DynProg::SemiGlobalTrans, 
			   segment_cost, // DNA
			   reverse_both, reverse_both,
                          rep.read(r).marked_pos);

      bool onlyBottomTriangle = !local ;
      int score = dp.compute(onlyBottomTriangle, BOTTOM_TRIANGLE_SHIFT);
      
      if (local==true){ 
	dp.backtrack();
      }
      
      if (score > best_score)
	{
	  best_score = score ;
	  best_best_i = dp.best_i ;
	  best_best_j = dp.best_j ;
	  best_first_i = dp.first_i ;
	  best_first_j = dp.first_j ;
	  box->ref_nb = r ;
	  box->ref_label = rep.label(r) ;

          if (!local)
            dp.backtrack();
          box->marked_pos = dp.marked_pos_i ;
	}
	
	score_r.push_back(make_pair(score, r));

	// #define DEBUG_SEGMENT      

#ifdef DEBUG_SEGMENT	
	cout << rep.label(r) << " " << score << " " << dp.best_i << endl ;
#endif

    }
    sort(score_r.begin(),score_r.end(),comp_pair);

  box->ref = rep.sequence(box->ref_nb);
  box->del_right = reverse_both ? best_best_j : box->ref.size() - best_best_j - 1;
  box->del_left = best_first_j;
  box->start = best_first_i;
  
  box->score = score_r;

#ifdef DEBUG_SEGMENT	
  cout << "best: " << box->ref_label << " " << best_score ;
  cout << "del/del2/begin:" << (box->del_right) << "/" << (box->del_left) << "/" << (box->start) << endl;
  cout << endl;
#endif

  if (reverse_ref)
    // Why -1 here and +1 in dynprog.cpp /// best_i = m - best_i + 1 ;
    best_best_i = read.length() - best_best_i - 1 ;

  box->end = best_best_i ;
}