inline Vdouble SubstitutionProcessCollectionMember::getClassProbabilities() const { Vdouble vProb; for (size_t i = 0; i < getRateDistribution()->getNumberOfCategories(); i++) { vProb.push_back(getRateDistribution()->getProbability(i)); } return vProb; }
void YNGKP_M8::updateMatrices() { AbstractBiblioSubstitutionModel::updateMatrices(); // homogeneization of the synonymous substittion rates Vdouble vd; for (unsigned int i = 0; i < pmixmodel_->getNumberOfModels(); i++) { vd.push_back(1 / pmixmodel_->getNModel(i)->Qij(synfrom_, synto_)); } pmixmodel_->setVRates(vd); }
void YNGKP_M3::updateMatrices() { for (unsigned int i = 0; i < lParPmodel_.size(); i++) { if (mapParNamesFromPmodel_.find(lParPmodel_[i].getName()) != mapParNamesFromPmodel_.end()) { if (lParPmodel_[i].getName()[18] == 'V') { unsigned int ind = TextTools::toInt(lParPmodel_[i].getName().substr(19)); double x = getParameterValue("omega0"); for (unsigned j = 1; j < ind; j++) { x += getParameterValue("delta" + TextTools::toString(j)); } lParPmodel_[i].setValue(x); } else { lParPmodel_[i].setValue(getParameter(getParameterNameWithoutNamespace(mapParNamesFromPmodel_[lParPmodel_[i].getName()])).getValue()); } } } pmixmodel_->matchParametersValues(lParPmodel_); // homogeneization of the synonymous substitution rates Vdouble vd; for (unsigned int i = 0; i < pmixmodel_->getNumberOfModels(); i++) { vd.push_back(1 / pmixmodel_->getNModel(i)->Qij(synfrom_, synto_)); } pmixmodel_->setVRates(vd); }
int main(int args, char** argv) { cout << "******************************************************************" << endl; cout << "* Bio++ Computation of site likelihoods inside mixed models *" << endl; cout << "* Version 2.2.0. *" << endl; cout << "* Author: L. Guéguen Last Modif.: 25/09/14 *" << endl; cout << "******************************************************************" << endl; cout << endl; if (args == 1) { help(); return 0; } try { BppApplication bppmixedlikelihoods(args, argv, "BppMixedLikelihoods"); bppmixedlikelihoods.startTimer(); Alphabet* alphabet = SequenceApplicationTools::getAlphabet(bppmixedlikelihoods.getParams(), "", false); auto_ptr<GeneticCode> gCode; CodonAlphabet* codonAlphabet = dynamic_cast<CodonAlphabet*>(alphabet); if (codonAlphabet) { string codeDesc = ApplicationTools::getStringParameter("genetic_code", bppmixedlikelihoods.getParams(), "Standard", "", true, true); ApplicationTools::displayResult("Genetic Code", codeDesc); gCode.reset(SequenceApplicationTools::getGeneticCode(codonAlphabet->getNucleicAlphabet(), codeDesc)); } // get the data VectorSiteContainer* allSites = SequenceApplicationTools::getSiteContainer(alphabet, bppmixedlikelihoods.getParams()); VectorSiteContainer* sites = SequenceApplicationTools::getSitesToAnalyse(*allSites, bppmixedlikelihoods.getParams(), "", true, false); delete allSites; ApplicationTools::displayResult("Number of sequences", TextTools::toString(sites->getNumberOfSequences())); ApplicationTools::displayResult("Number of sites", TextTools::toString(sites->getNumberOfSites())); // Get the tree Tree* tree = PhylogeneticsApplicationTools::getTree(bppmixedlikelihoods.getParams()); ApplicationTools::displayResult("Number of leaves", TextTools::toString(tree->getNumberOfLeaves())); AbstractDiscreteRatesAcrossSitesTreeLikelihood* tl; string nhOpt = ApplicationTools::getStringParameter("nonhomogeneous", bppmixedlikelihoods.getParams(), "no", "", true, false); ApplicationTools::displayResult("Heterogeneous model", nhOpt); MixedSubstitutionModel* model = 0; MixedSubstitutionModelSet* modelSet = 0; DiscreteDistribution* rDist = 0; if (nhOpt == "no") { model = dynamic_cast<MixedSubstitutionModel*>(PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams())); if (model == 0) { cout << "Model is not a Mixed model" << endl; exit(0); } SiteContainerTools::changeGapsToUnknownCharacters(*sites); if (model->getNumberOfStates() > model->getAlphabet()->getSize()) { // Markov-modulated Markov model! rDist = new ConstantRateDistribution(); } else { rDist = PhylogeneticsApplicationTools::getRateDistribution(bppmixedlikelihoods.getParams()); } tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true); } else if (nhOpt == "one_per_branch") { model = dynamic_cast<MixedSubstitutionModel*>(PhylogeneticsApplicationTools::getSubstitutionModel(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams())); if (model == 0) { cout << "Model is not a Mixed model" << endl; exit(0); } SiteContainerTools::changeGapsToUnknownCharacters(*sites); if (model->getNumberOfStates() > model->getAlphabet()->getSize()) { // Markov-modulated Markov model! rDist = new ConstantRateDistribution(); } else { rDist = PhylogeneticsApplicationTools::getRateDistribution(bppmixedlikelihoods.getParams()); } vector<double> rateFreqs; if (model->getNumberOfStates() != alphabet->getSize()) { // Markov-Modulated Markov Model... unsigned int n = (unsigned int)(model->getNumberOfStates() / alphabet->getSize()); rateFreqs = vector<double>(n, 1. / (double)n); // Equal rates assumed for now, may be changed later (actually, in the most general case, // we should assume a rate distribution for the root also!!! } std::map<std::string, std::string> aliasFreqNames; FrequenciesSet* rootFreqs = PhylogeneticsApplicationTools::getRootFrequenciesSet(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams(), aliasFreqNames, rateFreqs); vector<string> globalParameters = ApplicationTools::getVectorParameter<string>("nonhomogeneous_one_per_branch.shared_parameters", bppmixedlikelihoods.getParams(), ',', ""); modelSet = dynamic_cast<MixedSubstitutionModelSet*>(SubstitutionModelSetTools::createNonHomogeneousModelSet(model, rootFreqs, tree, aliasFreqNames, globalParameters)); model = 0; tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true); } else if (nhOpt == "general") { modelSet = dynamic_cast<MixedSubstitutionModelSet*>(PhylogeneticsApplicationTools::getSubstitutionModelSet(alphabet, gCode.get(), sites, bppmixedlikelihoods.getParams())); if (modelSet == 0) { cout << "Missing a Mixed model" << endl; exit(0); } SiteContainerTools::changeGapsToUnknownCharacters(*sites); if (modelSet->getNumberOfStates() > modelSet->getAlphabet()->getSize()) { // Markov-modulated Markov model! rDist = new ConstantDistribution(1.); } else { rDist = PhylogeneticsApplicationTools::getRateDistribution(bppmixedlikelihoods.getParams()); } tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, true); } else throw Exception("Unknown option for nonhomogeneous: " + nhOpt); tl->initialize(); double logL = tl->getValue(); if (isinf(logL)) { // This may be due to null branch lengths, leading to null likelihood! ApplicationTools::displayWarning("!!! Warning!!! Likelihood is zero."); ApplicationTools::displayWarning("!!! This may be due to branch length == 0."); ApplicationTools::displayWarning("!!! All null branch lengths will be set to 0.000001."); ParameterList pl = tl->getBranchLengthsParameters(); for (unsigned int i = 0; i < pl.size(); i++) { if (pl[i].getValue() < 0.000001) pl[i].setValue(0.000001); } tl->matchParametersValues(pl); logL = tl->getValue(); } if (isinf(logL)) { ApplicationTools::displayError("!!! Unexpected likelihood == 0."); ApplicationTools::displayError("!!! Looking at each site:"); for (unsigned int i = 0; i < sites->getNumberOfSites(); i++) { (*ApplicationTools::error << "Site " << sites->getSite(i).getPosition() << "\tlog likelihood = " << tl->getLogLikelihoodForASite(i)).endLine(); } ApplicationTools::displayError("!!! 0 values (inf in log) may be due to computer overflow, particularily if datasets are big (>~500 sequences)."); exit(-1); } // Write parameters to screen: ApplicationTools::displayResult("Log likelihood", TextTools::toString(tl->getValue(), 15)); ParameterList parameters = tl->getSubstitutionModelParameters(); for (unsigned int i = 0; i < parameters.size(); i++) { ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue())); } parameters = tl->getRateDistributionParameters(); for (unsigned int i = 0; i < parameters.size(); i++) { ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue())); } // ///////////////////////////////////////////// // Getting likelihoods per submodel string outputFile; outputFile = ApplicationTools::getAFilePath("output.likelihoods.file", bppmixedlikelihoods.getParams(), true, false); ApplicationTools::displayResult("Output file for likelihoods", outputFile); ofstream out(outputFile.c_str(), ios::out); size_t nSites = sites->getNumberOfSites(); size_t nummodel = ApplicationTools::getParameter<size_t>("likelihoods.model_number", bppmixedlikelihoods.getParams(), 1, "", true, true); string parname = ApplicationTools::getStringParameter("likelihoods.parameter_name", bppmixedlikelihoods.getParams(), "", "", true, false); if (modelSet && ((nummodel <= 0) || (nummodel > modelSet->getNumberOfModels()))) { ApplicationTools::displayError("Bad number of model " + TextTools::toString(nummodel) + "."); exit(-1); } MixedSubstitutionModel* p0 = dynamic_cast<MixedSubstitutionModel*>(model ? model : modelSet->getModel(nummodel - 1)); if (!p0) { ApplicationTools::displayError("Model " + TextTools::toString(nummodel) + " is not a Mixed Model."); exit(-1); } const AbstractBiblioMixedSubstitutionModel* ptmp = dynamic_cast<const AbstractBiblioMixedSubstitutionModel*>(p0); if (ptmp) { p0 = ptmp->getMixedModel().clone(); if (nhOpt == "no") model = p0; else { modelSet->replaceModel(nummodel-1, p0); modelSet->isFullySetUpFor(*tree); } } ////////////////////////////////////////////////// // Case of a MixtureOfSubstitutionModels MixtureOfSubstitutionModels* pMSM = dynamic_cast<MixtureOfSubstitutionModels*>(p0); if (pMSM) { vector<string> colNames; colNames.push_back("Sites"); size_t nummod = pMSM->getNumberOfModels(); for (unsigned int i = 0; i < nummod; i++) { colNames.push_back(pMSM->getNModel(i)->getName()); } DataTable* rates = new DataTable(nSites, colNames.size()); rates->setColumnNames(colNames); for (unsigned int i = 0; i < nSites; i++) { const Site* currentSite = &sites->getSite(i); int currentSitePosition = currentSite->getPosition(); (*rates)(i, "Sites") = string("[" + TextTools::toString(currentSitePosition) + "]"); } Vdouble vprob = pMSM->getProbabilities(); for (unsigned int i = 0; i < nummod; i++) { string modname = pMSM->getNModel(i)->getName(); for (unsigned int j = 0; j < nummod; j++) { pMSM->setNProbability(j, (j == i) ? 1 : 0); } if (tl) delete tl; if (nhOpt == "no") tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true, false, true); else tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, false, true); tl->initialize(); logL = tl->getValue(); Vdouble Vd = tl->getLogLikelihoodForEachSite(); for (unsigned int j = 0; j < nSites; j++) { (*rates)(j, modname) = TextTools::toString(Vd[j]); } ApplicationTools::displayMessage("\n"); ApplicationTools::displayMessage("Model " + modname + ":"); ApplicationTools::displayResult("Log likelihood", TextTools::toString(tl->getValue(), 15)); ApplicationTools::displayResult("Probability", TextTools::toString(vprob[i], 15)); } DataTable::write(*rates, out, "\t"); } ////////////////////////////////////////////////// // Case of a MixtureOfASubstitutionModel else { MixtureOfASubstitutionModel* pMSM2 = dynamic_cast<MixtureOfASubstitutionModel*>(p0); if (pMSM2 != NULL) { size_t nummod = pMSM2->getNumberOfModels(); if (parname == "") { ParameterList pl=pMSM2->getParameters(); for (size_t i2 = 0; i2 < pl.size(); i2++) { string pl2n = pl[i2].getName(); if (dynamic_cast<const ConstantDistribution*>(pMSM2->getDistribution(pl2n))==NULL) { parname=pl2n; while (parname.size()>0 && pMSM2->getDistribution(parname)==NULL) parname=pl2n.substr(0,pl2n.rfind("_")); if (parname.size()>0){ ApplicationTools::displayResult("likelihoods.parameter_name", parname); break; } } } } if (parname == "") { ApplicationTools::displayError("Argument likelihoods.parameter_name is required."); exit(-1); } vector< Vint > vvnmod; size_t i2 = 0; while (i2 < nummod) { string par2 = parname + "_" + TextTools::toString(i2 + 1); Vint vnmod = pMSM2->getSubmodelNumbers(par2); if (vnmod.size() == 0) break; vvnmod.push_back(vnmod); i2++; } size_t nbcl = vvnmod.size(); if (nbcl==0) throw Exception("Parameter " + parname + " is not mixed."); Vdouble vprob = pMSM2->getProbabilities(); vector<vector<double> > vvprob; vector<double> vsprob; for (size_t i = 0; i < nbcl; i++) { vector<double> vprob2; for (size_t j = 0; j < vvnmod[i].size(); j++) { vprob2.push_back(vprob[static_cast<size_t>(vvnmod[i][j])]); } vvprob.push_back(vprob2); vsprob.push_back(VectorTools::sum(vvprob[i])); } vector<string> colNames; colNames.push_back("Sites"); Vdouble dval; for (unsigned int i = 0; i < nbcl; i++) { SubstitutionModel* pSM = pMSM2->getNModel(static_cast<size_t>(vvnmod[i][0])); double valPar = pSM->getParameterValue(pSM->getParameterNameWithoutNamespace(parname)); dval.push_back(valPar); colNames.push_back("Ll_" + parname + "=" + TextTools::toString(valPar)); } for (unsigned int i = 0; i < nbcl; i++) colNames.push_back("Pr_" + parname + "=" + TextTools::toString(dval[i])); colNames.push_back("mean"); DataTable* rates = new DataTable(nSites, colNames.size()); rates->setColumnNames(colNames); for (unsigned int i = 0; i < nSites; i++) { const Site* currentSite = &sites->getSite(i); int currentSitePosition = currentSite->getPosition(); (*rates)(i,"Sites")=TextTools::toString(currentSitePosition); } VVdouble vvd; vector<double> vRates = pMSM2->getVRates(); for (size_t i = 0; i < nbcl; ++i) { string par2 = parname + "_" + TextTools::toString(i + 1); for (unsigned int j = 0; j < nummod; ++j) pMSM2->setNProbability(j, 0); for (size_t j = 0; j < vvprob[i].size(); ++j) pMSM2->setNProbability(static_cast<size_t>(vvnmod[i][j]), vvprob[i][j] / vsprob[i]); if (tl) delete tl; if (nhOpt == "no") tl = new RHomogeneousMixedTreeLikelihood(*tree, *sites, model, rDist, true, false, true); else tl = new RNonHomogeneousMixedTreeLikelihood(*tree, *sites, modelSet, rDist, false, true); tl->initialize(); logL = tl->getValue(); Vdouble vd = tl->getLogLikelihoodForEachSite(); for (unsigned int j = 0; j < nSites; j++) (*rates)(j, i + 1) = TextTools::toString(vd[j]); vvd.push_back(vd); ApplicationTools::displayMessage("\n"); ApplicationTools::displayMessage("Parameter " + par2 + "=" + TextTools::toString(dval[i]) + " with rate=" + TextTools::toString(vRates[i])); ApplicationTools::displayResult("Log likelihood", TextTools::toString(tl->getValue(), 15)); ApplicationTools::displayResult("Probability", TextTools::toString(vsprob[i], 15)); } for (unsigned int j = 0; j < nSites; j++) { Vdouble vd; for (unsigned int i = 0; i < nbcl; i++) vd.push_back(std::log(vsprob[i])+vvd[i][j]); VectorTools::logNorm(vd); for (unsigned int i = 0; i < nbcl; i++) (*rates)(j,nbcl + i + 1) = TextTools::toString(std::exp(vd[i])); (*rates)(j, 2 * nbcl + 1) = TextTools::toString(VectorTools::sumExp(vd, dval)); } DataTable::write(*rates, out, "\t"); } } delete alphabet; delete sites; if (model) delete model; if (modelSet) delete modelSet; delete rDist; delete tl; delete tree; ApplicationTools::displayMessage("\n"); bppmixedlikelihoods.done(); } catch (exception& e) { cout << e.what() << endl; return 1; } return 0; }
double simulate_with_dependence (string treeFile, double PI_1, double init_k, int total_positions, int num_pos_with_same_k, double k_increase, int is_gamma, double alpha, double beta, int num_cat) { //read Newick format tree tree treeIn(treeFile); //four states alphabet A C G T (will later be rplaced to 00,01,10,11) alphabet* alph = new nucleotide; sequenceContainer SC_all; //this will contain all positions //parameters: double PI_0 = 1-PI_1; double k = init_k; //will be increased with each iteration //parameters: int jump_size = total_positions / num_pos_with_same_k; for(int i=0; i<jump_size; i++) { Vdouble freqs; //stationary probabilities PI_00, PI_01, PI_10, PI_11 double TOTAL = k*PI_1*PI_1 + 2*PI_0*PI_1 + k*PI_0*PI_0; freqs.push_back(k*PI_0*PI_0 / TOTAL); //PI_00 = k*PI_0*PI_0 / TOTAL freqs.push_back(PI_0*PI_1 / TOTAL); //PI_01 = PI_0*PI_1 / TOTAL freqs.push_back(PI_0*PI_1 / TOTAL); //PI_10 = PI_0*PI_1 / TOTAL freqs.push_back(k*PI_1*PI_1 / TOTAL); //PI_11 = k*PI_1*PI_1 / TOTAL //Q matrix (partial values - the rest are calculated by gtrModel using freqs and these values) MDOUBLE a2c = PI_1; // --> c2a = freqs[a]*a2c/freqs[c] --> c2a = ((k*PI_0*PI_0 / TOTAL)*PI_1)/(PI_0*PI_1 / TOTAL) = k*PI_0 MDOUBLE a2g = PI_1; MDOUBLE a2t = 0; MDOUBLE c2g = 0; MDOUBLE c2t = k*PI_1; MDOUBLE g2t = k*PI_1; //starting the evolutionary model distribution *currDist = NULL; if(is_gamma == 1) { currDist = new generalGammaDistribution(alpha,beta,num_cat); // ---> in the future we might want to turn these into param } else { currDist = new uniDistribution; // no among site rate variation } replacementModel *probMod = NULL; pijAccelerator *pijAcc = NULL; probMod = new gtrModel(freqs,a2c,a2g,a2t,c2g,c2t,g2t); pijAcc = new trivialAccelerator(probMod); stochasticProcess* _sp = new stochasticProcess(currDist, pijAcc); //simulate: simulateTree st1(treeIn, *_sp, alph); st1.generate_seq(num_pos_with_same_k); //simulate num_pos_with_same_k positions with the current k if(i == 0) { SC_all = st1.toSeqDataWithoutInternalNodes(); //first time } else { sequenceContainer SC = st1.toSeqDataWithoutInternalNodes(); //concatenate new positions to the ones you have SC_all.concatenate(SC); } delete currDist; delete probMod; delete pijAcc; delete _sp; k = k + k_increase; //k = 1 , 1.05 , 1.1 , ... , 5.5 } //prepare out file name: std::stringstream sstm; if(is_gamma == 1) { sstm << treeFile << ".gammaRateNoInv.PI_1=" << PI_1 << ".init_k=" << init_k << ".k_group_size=" << num_pos_with_same_k << ".k_increase=" << k_increase << ".fas"; } else { sstm << treeFile << ".NoRate.PI_1=" << PI_1 << ".init_k=" << init_k << ".k_group_size=" << num_pos_with_same_k << ".k_increase=" << k_increase << ".fas"; } std::string seqOutputFile = sstm.str(); //write out: ofstream seq_sim(seqOutputFile.c_str()); fastaFormat::write(seq_sim,SC_all); seq_sim.close(); delete alph; return 0; }