pair<string,scalar_type> tree_LL_nucl(string tree,string aln_filename,bool optimize_bls,scalar_type tolerance) { //const Alphabet* alphabet = new ProteicAlphabet(); const Alphabet* alphabet = new RNA(); OrderedSequenceContainer *alignment; VectorSiteContainer* sites; Fasta Reader; //NexusIOSequence Reader; //Phylip * Reader=new Phylip(true,true,100,true,"\r"); alignment = Reader.read(aln_filename, alphabet); sites = new VectorSiteContainer(*alignment); SiteContainerTools::removeGapOnlySites(*sites); SiteContainerTools::changeGapsToUnknownCharacters(*sites); TreeTemplate<Node>* ttree1=TreeTemplateTools::parenthesisToTree(tree,false,"ID"); DiscreteRatesAcrossSitesTreeLikelihood* tl1; SubstitutionModel* model = 0; DiscreteDistribution* rDist = 0; model = new GTR(&AlphabetTools::RNA_ALPHABET); model->setFreqFromData(*sites); rDist = new GammaDiscreteDistribution(8, 1, 1); tl1 = new RHomogeneousTreeLikelihood(*ttree1, *sites, model, rDist, true, false, false); tl1->initialize(); if (optimize_bls) { //Newton.. ParameterList * parameters= new ParameterList(); parameters->addParameters( tl1->getBranchLengthsParameters()); parameters->addParameters( tl1->getRateDistributionParameters()); OptimizationTools::optimizeNumericalParameters( dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*> (tl1), //tl1->getParameters(), *parameters, 0, 1, tolerance, 1000, 0, 0, false, 0, OptimizationTools::OPTIMIZATION_NEWTON, //OptimizationTools::OPTIMIZATION_BRENT); OptimizationTools::OPTIMIZATION_BFGS); delete parameters; } scalar_type LL=- tl1->getValue(); //Here's your log likelihood value ! //tl1->getParameters().printParameters(cout); //cout << TreeTemplateTools::treeToParenthesis( tl1->getTree() ) <<endl; pair<string,scalar_type> return_pair; return_pair.first= TreeTemplateTools::treeToParenthesis( tl1->getTree() ) ; return_pair.second=LL; delete sites; delete alphabet; delete model; delete rDist; delete tl1; return return_pair; }
Fasta extractInterestingGenes(Fasta &repertoire, string name) { Fasta interesting; int size = repertoire.size(); for (int i = 0; i < size; i++) { if (repertoire.label(i).find(name) != string::npos) { interesting.add(repertoire.read(i)); } } return interesting; }
scalar_type tree_LL(string tree,string aln_filename,bool optimize_bls,scalar_type tolerance) { const Alphabet* alphabet = new ProteicAlphabet(); OrderedSequenceContainer *alignment; VectorSiteContainer* sites; Fasta Reader; //Phylip * Reader=new Phylip(true,true,100,true,"\r"); alignment = Reader.read(aln_filename, alphabet); sites = new VectorSiteContainer(*alignment); SiteContainerTools::changeGapsToUnknownCharacters(*sites); TreeTemplate<Node>* ttree1=TreeTemplateTools::parenthesisToTree(tree,false,"ID"); //Newick newick1; //ttree1 = newick1.read(tree); DiscreteRatesAcrossSitesTreeLikelihood* tl1; SubstitutionModel* model = 0; DiscreteDistribution* rDist = 0; model = new LG08(&AlphabetTools::PROTEIN_ALPHABET, new FullProteinFrequenciesSet(&AlphabetTools::PROTEIN_ALPHABET), true); model->setFreqFromData(*sites); rDist = new GammaDiscreteDistribution(4, 1, 1); tl1 = new RHomogeneousTreeLikelihood(*ttree1, *sites, model, rDist, true, false, false); tl1->initialize(); /* if (optimize_bls) { Optimizer* optimizer = new PseudoNewtonOptimizer(tl1); // Optimizer* optimizer = new PseudoNewtonOptimizer(tl1); ParameterList * parameters= new ParameterList(); parameters->addParameters( tl1->getBranchLengthsParameters()); parameters->addParameters( tl1->getRateDistributionParameters()); //Newton.. optimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO); optimizer->setProfiler(0); optimizer->setMessageHandler(0); optimizer->setVerbose(0); optimizer->getStopCondition()->setTolerance(0.01); optimizer->init(*parameters); //optimizer->init(tl1->getParameters()); optimizer->setMaximumNumberOfEvaluations(1000); optimizer->optimize(); delete parameters; delete optimizer; } */ if (optimize_bls) { //Newton.. ParameterList * parameters= new ParameterList(); parameters->addParameters( tl1->getBranchLengthsParameters()); parameters->addParameters( tl1->getRateDistributionParameters()); OptimizationTools::optimizeNumericalParameters( dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*> (tl1), //tl1->getParameters(), *parameters, 0, 1, tolerance, 1000, 0, 0, false, 0, OptimizationTools::OPTIMIZATION_NEWTON, //OptimizationTools::OPTIMIZATION_BRENT); OptimizationTools::OPTIMIZATION_BFGS); delete parameters; } scalar_type LL=- tl1->getValue(); //Here's your log likelihood value ! delete sites; delete alphabet; delete model; delete rDist; delete tl1; return LL; }
void align_against_collection(string &read, Fasta &rep, int forbidden_rep_id, bool reverse_ref, bool reverse_both, bool local, AlignBox *box, Cost segment_cost) { int best_score = MINUS_INF ; box->ref_nb = MINUS_INF ; int best_best_i = (int) string::npos ; int best_best_j = (int) string::npos ; int best_first_i = (int) string::npos ; int best_first_j = (int) string::npos ; vector<pair<int, int> > score_r; DynProg::DynProgMode dpMode = DynProg::LocalEndWithSomeDeletions; if (local==true) dpMode = DynProg::Local; // With reverse_ref, the read is reversed to prevent calling revcomp on each reference sequence string sequence_or_rc = revcomp(read, reverse_ref); for (int r = 0 ; r < rep.size() ; r++) { if (r == forbidden_rep_id) continue; DynProg dp = DynProg(sequence_or_rc, rep.sequence(r), dpMode, // DynProg::SemiGlobalTrans, segment_cost, // DNA reverse_both, reverse_both, rep.read(r).marked_pos); bool onlyBottomTriangle = !local ; int score = dp.compute(onlyBottomTriangle, BOTTOM_TRIANGLE_SHIFT); if (local==true){ dp.backtrack(); } if (score > best_score) { best_score = score ; best_best_i = dp.best_i ; best_best_j = dp.best_j ; best_first_i = dp.first_i ; best_first_j = dp.first_j ; box->ref_nb = r ; box->ref_label = rep.label(r) ; if (!local) dp.backtrack(); box->marked_pos = dp.marked_pos_i ; } score_r.push_back(make_pair(score, r)); // #define DEBUG_SEGMENT #ifdef DEBUG_SEGMENT cout << rep.label(r) << " " << score << " " << dp.best_i << endl ; #endif } sort(score_r.begin(),score_r.end(),comp_pair); box->ref = rep.sequence(box->ref_nb); box->del_right = reverse_both ? best_best_j : box->ref.size() - best_best_j - 1; box->del_left = best_first_j; box->start = best_first_i; box->score = score_r; #ifdef DEBUG_SEGMENT cout << "best: " << box->ref_label << " " << best_score ; cout << "del/del2/begin:" << (box->del_right) << "/" << (box->del_left) << "/" << (box->start) << endl; cout << endl; #endif if (reverse_ref) // Why -1 here and +1 in dynprog.cpp /// best_i = m - best_i + 1 ; best_best_i = read.length() - best_best_i - 1 ; box->end = best_best_i ; }