bool TestAutocorrelatedBranchHeterogeneousGtrModel::run( void ) { // fix the rng seed std::vector<unsigned int> seed; seed.push_back(25); seed.push_back(42); GLOBAL_RNG->setSeed(seed); /* First, we read in the data */ // the matrix std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename); std::cout << "Read " << data.size() << " matrices." << std::endl; std::cout << data[0] << std::endl; std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename ); std::cout << "Read " << trees.size() << " trees." << std::endl; std::cout << trees[0]->getNewickRepresentation() << std::endl; /* set up the model graph */ ////////////////////// // first the priors // ////////////////////// // birth-death process priors StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(100.0)) )); ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0)); ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0)); // gtr model priors ConstantNode<std::vector<double> > *bf = new ConstantNode<std::vector<double> >( "bf", new std::vector<double>(4,1.0) ); ConstantNode<std::vector<double> > *e = new ConstantNode<std::vector<double> >( "e", new std::vector<double>(6,1.0) ); //Root frequencies StochasticNode<std::vector<double> > *rf = new StochasticNode<std::vector<double> >( "rf", new DirichletDistribution(bf) ); StochasticNode<std::vector<double> > * er = new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) ) ; std::cout << "bf:\t" << bf->getValue() << std::endl; std::cout << "e:\t" << e->getValue() << std::endl; std::vector<std::string> names = data[0]->getTaxonNames(); ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( trees[0]->getRoot().getAge()*2.0 ) ); std::vector<RevBayesCore::Taxon> taxa; for (size_t i = 0; i < names.size(); ++i) { taxa.push_back( Taxon( names[i] ) ); } StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, div, turn, rho, "uniform", "survival", taxa, std::vector<Clade>()) ); tau->setValue( trees[0] ); std::cout << "tau:\t" << tau->getValue() << std::endl; std::vector<StochasticNode < std::vector<double> >* > pis; // std::vector<StochasticNode < std::vector<double> >* > ers; std::vector< const TypedDagNode < RateMatrix>* > qs; ConstantNode<double> *alpha_prior_shape = new ConstantNode< double >("alpha_prior_shape", new double( 5.0 ) ); ConstantNode<double> *alpha_prior_rate = new ConstantNode< double >("alpha_prior_rtae", new double( 0.5 ) ); StochasticNode<double> *alpha = new StochasticNode<double>("alpha", new GammaDistribution( alpha_prior_shape, alpha_prior_rate ) ); ConstantNode<double> *beta_prior_shape1 = new ConstantNode< double >("beta_prior_shape1", new double( 2.0 ) ); ConstantNode<double> *beta_prior_shape2 = new ConstantNode< double >("beta_prior_shape2", new double( 5.0 ) ); StochasticNode<double> *beta = new StochasticNode<double>("beta", new BetaDistribution( beta_prior_shape1, beta_prior_shape2 ) ); StochasticNode< RbVector<RateMatrix> > *perBranchQ = new StochasticNode< RbVector< RateMatrix > >( "autocorrBranchRate", new AutocorrelatedBranchMatrixDistribution( tau, beta, rf, er, alpha ) ); // StochasticNode< std::vector<RateMatrix> > *perBranchQ = new StochasticNode< std::vector< RateMatrix > >( "autocorrBranchRate", new DPP< RateMatrix >( tau, ... ) ); // // for (unsigned int i = 0 ; i < numBranches ; i++ ) { // std::ostringstream pi_name; // pi_name << "pi(" << i << ")"; // pis.push_back(new StochasticNode<std::vector<double> >( pi_name.str(), new DirichletDistribution(bf) ) ); // // ers.push_back(new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) ) ); // std::ostringstream q_name; // q_name << "q(" << i << ")"; // qs.push_back(new DeterministicNode<RateMatrix>( q_name.str(), new GtrRateMatrixFunction(er, pis[i]) )); // std::cout << "Q:\t" << qs[i]->getValue() << std::endl; // } // and the character model GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters()); phyloCTMC->setRootFrequencies( rf ); phyloCTMC->setRateMatrix( perBranchQ ); StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC ); charactermodel->clamp( data[0] ); /* add the moves */ RbVector<Move> moves; moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 2, true ) ); moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) ); moves.push_back( new NarrowExchange( tau, 10.0 ) ); moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) ); moves.push_back( new SubtreeScale( tau, 5.0 ) ); // moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) ); moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) ); moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) ); moves.push_back( new SimplexMove( er, 10.0, 1, 0, true, 2.0 ) ); moves.push_back( new SimplexMove( er, 100.0, 6, 0, true, 2.0 ) ); moves.push_back( new SimplexMove( rf, 10.0, 1, 0, true, 2.0 ) ); moves.push_back( new SimplexMove( rf, 100.0, 4, 0, true, 2.0 ) ); // for (unsigned int i = 0 ; i < numBranches ; i ++ ) { // // moves.push_back( new SimplexMove( ers[i], 10.0, 1, true, 2.0 ) ); // moves.push_back( new SimplexMove( pis[i], 10.0, 1, true, 2.0 ) ); // // moves.push_back( new SimplexMove( ers[i], 100.0, 6, true, 2.0 ) ); // moves.push_back( new SimplexMove( pis[i], 100.0, 4, true, 2.0 ) ); // } // add some tree stats to monitor DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) ); /* add the monitors */ RbVector<Monitor> monitors; std::set<DagNode*> monitoredNodes; // monitoredNodes.insert( er ); // monitoredNodes.insert( pi ); monitoredNodes.insert( div ); monitors.push_back( new FileMonitor( monitoredNodes, 10, "TestAutocorrelatedBranchHeterogeneousGtrModel.log", "\t" ) ); std::set<DagNode*> monitoredNodes1; monitoredNodes1.insert( er ); /* for (unsigned int i = 0 ; i < numBranches ; i ++ ) { monitoredNodes1.insert( pis[i] ); }*/ monitoredNodes1.insert( rf ); monitoredNodes1.insert( treeHeight ); monitors.push_back( new FileMonitor( monitoredNodes1, 10, "TestAutocorrelatedBranchHeterogeneousGtrModelSubstRates.log", "\t" ) ); monitors.push_back( new ScreenMonitor( monitoredNodes1, 10, "\t" ) ); std::set<DagNode*> monitoredNodes2; monitoredNodes2.insert( tau ); monitors.push_back( new FileMonitor( monitoredNodes2, 10, "TestAutocorrelatedBranchHeterogeneousGtrModel.tree", "\t", false, false, false ) ); /* instantiate the model */ Model myModel = Model( tau ); std::vector<DagNode*> &nodes = myModel.getDagNodes(); for (std::vector<DagNode*>::iterator it = nodes.begin(); it != nodes.end(); ++it) { std::cerr << (*it)->getName() << std::endl; } /* instiate and run the MCMC */ Mcmc myMcmc = Mcmc( myModel, moves, monitors ); myMcmc.run(mcmcGenerations); myMcmc.printOperatorSummary(); /* clean up */ // for (size_t i = 0; i < 10; ++i) { // delete x[i]; // } // delete [] x; delete div; // delete sigma; // delete a; // delete b; // delete c; std::cout << "Finished Autocorrelated Branch Heterogeneous GTR model test." << std::endl; return true; }
bool TestBranchHeterogeneousHkyModel::run( void ) { /* First, we read in the data */ // the matrix std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename); std::cout << "Read " << data.size() << " matrices." << std::endl; std::cout << data[0] << std::endl; /* set up the model graph */ ////////////////////// // first the priors // ////////////////////// // birth-death process priors StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(100.0)) )); ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0)); ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0)); // hky model priors ConstantNode<std::vector<double> > *bfPrior = new ConstantNode<std::vector<double> >( "bfPrior", new std::vector<double>(4,1.0) ); ConstantNode< double > *tstvPrior = new ConstantNode< double >( "tstvPrior", new double(1.0) ); // root frequencies StochasticNode<std::vector<double> > *rf = new StochasticNode<std::vector<double> >( "rf", new DirichletDistribution(bfPrior) ); // // first the hyper-priors of the clock model ConstantNode<double> *a = new ConstantNode<double>("a", new double(0.5) ); ConstantNode<double> *b = new ConstantNode<double>("b", new double(0.25) ); // // // then the parameters ContinuousStochasticNode *expectLN = new ContinuousStochasticNode( "UCLN.expectation", new ExponentialDistribution(a) ); // the expectation of the LN dist so mu = log(expectLN) - (sigLN^2)/2 ContinuousStochasticNode *sigLN = new ContinuousStochasticNode("UCLN.variance", new ExponentialDistribution(b) ); DeterministicNode<double> *logExpLN = new DeterministicNode<double>("logUCLN.exp", new LnFunction(expectLN) ); DeterministicNode<double> *squareSigLN = new DeterministicNode<double>("squareSigLN", new BinaryMultiplication<double, double, double>(sigLN, sigLN) ); DeterministicNode<double> *divSqSigLN = new DeterministicNode<double>("divSqSigLN", new BinaryDivision<double, double, double>(squareSigLN, new ConstantNode<double>( "2", new double (2.0))) ); DeterministicNode<double> *muValLN = new DeterministicNode<double>("MuValLN", new BinarySubtraction<double, double, double>(logExpLN, divSqSigLN) ); //Declaring a vector of HKY matrices size_t numBranches = 2*data[0]->getNumberOfTaxa() - 2; std::vector<StochasticNode < std::vector<double> >* > pis; std::vector< const TypedDagNode< RateMatrix >* > qs; StochasticNode < double >* tstv = new ContinuousStochasticNode("tstv", new ExponentialDistribution( tstvPrior ) ); // // // declaring a vector of clock rates std::vector<const TypedDagNode<double> *> branchRates; std::vector< ContinuousStochasticNode *> branchRates_nonConst; for (unsigned int i = 0 ; i < numBranches ; i++ ) { // construct the per branch rate matrix std::ostringstream pi_name; pi_name << "pi(" << i << ")"; pis.push_back(new StochasticNode<std::vector<double> >( pi_name.str(), new DirichletDistribution(bfPrior) ) ); std::ostringstream q_name; q_name << "q(" << i << ")"; qs.push_back(new DeterministicNode<RateMatrix>( q_name.str(), new HkyRateMatrixFunction( tstv, pis[i]) )); std::cout << "Q:\t" << qs[i]->getValue() << std::endl; // construct the per branch clock rate std::ostringstream br_name; br_name << "br(" << i << ")"; ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new LognormalDistribution(muValLN, sigLN, new ConstantNode<double>("offset", new double(0.0) ))); branchRates.push_back( tmp_branch_rate ); branchRates_nonConst.push_back( tmp_branch_rate ); } // build the vector containing all rates/rate-matrices // instead of independent rates/rate-matrices we could have used anything that specifies a distribution on a set of values // e.g. a mixture, DPP or an autocorrelated model DeterministicNode< std::vector< double > >* br_vector = new DeterministicNode< std::vector< double > >( "br_vector", new VectorFunction< double >( branchRates ) ); DeterministicNode< RbVector< RateMatrix > >* qs_node = new DeterministicNode< RbVector< RateMatrix > >( "q_vector", new RbVectorFunction<RateMatrix>(qs) ); // create the variables for the rate variation across sites // we use the standard 4 categorical gamma rate variation // though, any other rates could be used too as long as they are normalized ConstantNode<double> *alpha_prior = new ConstantNode<double>("alpha_prior", new double(0.5) ); ContinuousStochasticNode *alpha = new ContinuousStochasticNode("alpha", new ExponentialDistribution(alpha_prior) ); ConstantNode<double> *q1 = new ConstantNode<double>("q1", new double(0.125) ); DeterministicNode<double> *q1_value = new DeterministicNode<double>("q1_value", new QuantileFunction(q1, new GammaDistribution(alpha, alpha) ) ); ConstantNode<double> *q2 = new ConstantNode<double>("q2", new double(0.375) ); DeterministicNode<double> *q2_value = new DeterministicNode<double>("q2_value", new QuantileFunction(q2, new GammaDistribution(alpha, alpha) ) ); ConstantNode<double> *q3 = new ConstantNode<double>("q3", new double(0.625) ); DeterministicNode<double> *q3_value = new DeterministicNode<double>("q3_value", new QuantileFunction(q3, new GammaDistribution(alpha, alpha) ) ); ConstantNode<double> *q4 = new ConstantNode<double>("q4", new double(0.875) ); DeterministicNode<double> *q4_value = new DeterministicNode<double>("q4_value", new QuantileFunction(q4, new GammaDistribution(alpha, alpha) ) ); std::vector<const TypedDagNode<double>* > gamma_rates = std::vector<const TypedDagNode<double>* >(); gamma_rates.push_back(q1_value); gamma_rates.push_back(q2_value); gamma_rates.push_back(q3_value); gamma_rates.push_back(q4_value); DeterministicNode<std::vector<double> > *site_rates = new DeterministicNode<std::vector<double> >( "site_rates", new VectorFunction<double>(gamma_rates) ); DeterministicNode<std::vector<double> > *site_rates_norm = new DeterministicNode<std::vector<double> >( "site_rates_norm", new NormalizeVectorFunction(site_rates) ); // we actually do not use different probabilities per rate (yet!) // ConstantNode<std::vector<double> > *site_rate_probs = new ConstantNode<std::vector<double> >( "site_rate_probs", new std::vector<double>(4,1.0/4.0) ); // create the stochastic node for the tree // we use a birth-death process prior and thus a time-tree // we could use as well an unrooted tree std::vector<std::string> names = data[0]->getTaxonNames(); ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( 2.0 ) ); std::vector<RevBayesCore::Taxon> taxa; for (size_t i = 0; i < names.size(); ++i) { taxa.push_back( Taxon( names[i] ) ); } StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, div, turn, rho, "uniform", "survival", taxa, std::vector<Clade>()) ); // //rescale the tree so that its root age is 1 TimeTree *t = tau->getValue().clone(); const TopologyNode &root = t->getRoot(); TreeUtilities::rescaleTree(t, &t->getRoot(), 1.0 / root.getAge()); // tau->setValue( t ); std::cout << "tau:\t" << tau->getValue() << std::endl; // and the character model //GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *charModel = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, true, data[0]->getNumberOfCharacters() ); GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *charModel = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters() ); // set the branch heterogeneous substitution matrices // if you set instead of a vector a single matrix, then you get a homogeneous model charModel->setRateMatrix( qs_node ); charModel->setRootFrequencies( rf ); // set the per branch clock rates // if you instead specify a single rate, you get a strict clock model charModel->setClockRate( br_vector ); // specify the rate variation across sites // if you skip this then you get the model without rate variation across sites. charModel->setSiteRates( site_rates_norm ); StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", charModel ); charactermodel->clamp( data[0] ); /* add the moves */ RbVector<Move> moves; moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 2, true ) ); moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) ); moves.push_back( new NarrowExchange( tau, 10.0 ) ); moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) ); moves.push_back( new SubtreeScale( tau, 5.0 ) ); //Fixintg the root age at 1: // moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) ); // moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) ); //test: only 20 instead of 30 moves.push_back( new NodeTimeSlideUniform( tau, 20.0 ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(tstv, 1.0), 2, true ) ); moves.push_back( new SimplexSingleElementScale( rf, 10.0, true, 2.0 ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(alpha, 1.0), 2, true ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(expectLN, 1.0), 2, true ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(sigLN, 1.0), 2, true ) ); std::vector<StochasticNode<double> * > rates; for (unsigned int i = 0 ; i < numBranches ; i ++ ) { rates.push_back( branchRates_nonConst[i] ); } moves.push_back( new RateAgeBetaShift( tau, rates, 1.0, true, 10.0) ); //!< constructor for (unsigned int i = 0 ; i < numBranches ; i ++ ) { moves.push_back( new SimplexSingleElementScale( pis[i], 10.0, true, 2.0 ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(branchRates_nonConst[i], 1.0), 1, true ) ); } // add some tree stats to monitor DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) ); /* add the monitors */ RbVector<Monitor> monitors; std::set<DagNode*> monitoredNodes; monitoredNodes.insert( tstv ); monitoredNodes.insert( treeHeight ); monitors.push_back( new ScreenMonitor( monitoredNodes, 1, "\t" ) ); std::set<DagNode*> monitoredNodes2; monitoredNodes2.insert( tau ); monitors.push_back( new FileMonitor( monitoredNodes2, 10, "TestBranchHeterogeneousHkyModel.tree", "\t", false, false, false ) ); /* instantiate the model */ Model myModel = Model(qs[0]); monitors.push_back( new ModelMonitor( 10, "TestBranchHeterogeneousHkyModel.log", "\t" ) ); /* instiate and run the MCMC */ Mcmc myMcmc = Mcmc( myModel, moves, monitors ); // myMcmc.burnin(1000, 100); myMcmc.run(mcmcGenerations); myMcmc.printOperatorSummary(); /* clean up */ // for (size_t i = 0; i < 10; ++i) { // delete x[i]; // } // delete [] x; delete div; // delete sigma; // delete a; // delete b; // delete c; std::cout << "Finished GTR model test." << std::endl; return true; }
bool TestAdmixtureGraph::run(void) { std::cout << "Running TestAdmixtureGraph\n"; std::cout << "argc: " << argc << "\n"; if (argc > 1) { for (int i = 0; i < argc; i++) { argTokens.push_back(argv[i]); std::cout << i << " " << argTokens[i] << "\n"; } std::cout << argc << " == " << argTokens.size() << " arguments\n"; snpFilename = argTokens[1]; } // MODEL GRAPH std::vector<unsigned int> seed; seed = GLOBAL_RNG->getSeed(); std::cout << "seed " << seed[0] << " " << seed[1] << "\n"; //seed.clear(); seed.push_back(53866); seed.push_back(21201); GLOBAL_RNG->setSeed(seed); // 53866 21201 // read in data std::string fn = snpFilepath + snpFilename; int snpThinBy = 100; SnpData* snps = PopulationDataReader().readSnpData(fn,snpThinBy); // read in tree std::vector<AdmixtureTree*> trees; bool startTree = false; //treeFilename = ""; if (treeFilename != "") { // NclReader does not seem to work for Newick strings at this time /* trees = NclReader::getInstance().readAdmixtureTrees( snpFilepath + treeFilename, "newick" ); std::cout << "Read " << trees.size() << " trees." << std::endl; std::cout << trees[0]->getNewickRepresentation() << std::endl; */ // hacky workaround for now... //std::string newickStr = "(San:1,((Han:0.348958,Dai:0.348958):0.194964,(((Ket:0.351687,(Koryak:0.344761,(SiberianEskimo:0.325112,(((Huichol:0.269256,(Pima:0.233362,((Karitiana:0.104362,Aymara:0.104362):0.0120051,(Yukpa:0.100877,Mayan:0.100877):0.0154902):0.116995):0.0358943):0.0341607,Athabascan:0.303417):0.0128117,((EastGreenland:0.158699,WestGreenland:0.158699):0.0369703,Aleuts:0.195669):0.120559):0.00888335):0.0196487):0.0069259):0.0987521,((Nivhks:0.278599,Buryat:0.278599):0.0642882,Yakut:0.342887):0.107551):0.0756044,Altai:0.526043):0.0178791):0.456078)"; //std::string newickStr = "((A:.5,B:.5):.5,C:.5)"; std::string newickStr = "(San:1,((Koryak:0.926938,(SiberianEskimo:0.812519,((Aleuts:0.762302,(EastGreenland:0.4824,WestGreenland:0.4824):0.279902):0.0144746,((Huichol:0.353522,(Pima:0.324226,((Mayan:0.223067,Yukpa:0.223067):0.0856916,(Aymara:0.173581,Karitiana:0.173581):0.135178):0.015467):0.0292956):0.109212,Athabascan:0.462734):0.314043):0.0357419):0.114419):0.0600046,(Ket:0.772384,((Altai:0.640188,(Han:0.481097,Dai:0.481097):0.159091):0.0445788,((Buryat:0.596074,Nivhks:0.596074):0.015206,Yakut:0.61128):0.0734876):0.0876171):0.214558):0.0130575);"; NewickConverter nc; BranchLengthTree* blt = nc.convertFromNewick(newickStr); AdmixtureTree* at = TreeUtilities::convertToAdmixtureTree(*blt, snps->getPopulationNames()); at->setNames(snps->getPopulationNames()); at->updateTipOrderByNames(snps->getPopulationNames()); trees.push_back(at); startTree = true; } size_t numTaxa = snps->getNumPopulations(); size_t numNodes = 2 * numTaxa - 1; size_t numBranches = numNodes - 1; //size_t numSites = snps->getNumSnps(); int blockSize = 5000; double divGens = 1;//.01; int delay = 1000; int numTreeResults = 500; int numAdmixtureResults = 500; int maxNumberOfAdmixtureEvents = 1; double residualWeight = 2.0; bool useWishart = true; // if false, the composite likelihood function is used bool useBias = true; // if false, no covariance bias correction for small sample size is used bool useAdmixtureEdges = true; // if false, no admixture moves or edges are used bool useBranchRates = true; // if false, all populations are of the same size bool allowSisterAdmixture = true; // if false, admixture events cannot be between internal lineages who share a divergence parent bool discardNonPosDefMtx = true; // if false, round negative eigenvalues to positive eps bool useContrasts = false; // nothing really, need to remove bool updateParameters = true; bool updateTopology = true; bool updateNodeAges = true; bool useParallelMcmcmc = true; int numChains = 4; int numProcesses = numChains; // numProcesses=80; int swapInterval = 1; double deltaTemp = .1; double sigmaTemp = 1.0; double hottestTemp = 0.001; if (!true) { deltaTemp = exp(-log(hottestTemp)/pow(numChains-1,sigmaTemp)) - 1; std::cout << deltaTemp << "\n"; } double startingHeat = 1.0; double likelihoodScaler = 1.0; std::stringstream rndStr; rndStr << std::setw(9) << std::fixed << std::setprecision(0) << std::setfill('0') << std::floor(GLOBAL_RNG->uniform01()*1e9); // std::string outName = "papa." + rndStr.str(); std::string simName = "hgdp"; std::string outName = simName + "." + rndStr.str(); // std::string outName = simName + "." + rndStr.str() + "." + snpFilename; // BM diffusion rate ConstantNode<double>* a_bm = new ConstantNode<double>( "bm_a", new double(3)); ConstantNode<double>* b_bm = new ConstantNode<double>( "bm_b", new double(100)); //ConstantNode<double>* c_bm = new ConstantNode<double>( "bm_c", new double(0)); //ConstantNode<double>* d_bm = new ConstantNode<double>( "bm_d", new double(100)); StochasticNode<double>* diffusionRate = new StochasticNode<double> ("rate_BM", new ExponentialDistribution(b_bm)); //StochasticNode<double>* diffusionRate = new StochasticNode<double> ("rate_BM", new UniformDistribution(c_bm, d_bm)); // CPP rate // MJL 071713: Flat Poisson prior cannot overpower model overfitting when lnL is large. // Consider implementing Conway-Maxewell-Poisson distn instead. // This prior requires admixture events to improve lnL by N units // Negative values -> admixture rare // Positive values -> admixture common (set admixture cap) double adm_th_lnL = 10; double rate_cpp_prior = exp(adm_th_lnL); ConstantNode<double>* c = new ConstantNode<double>( "c", new double(1.0/rate_cpp_prior)); // admixture rate prior StochasticNode<double>* admixtureRate = new StochasticNode<double> ("rate_CPP", new ExponentialDistribution(c)); StochasticNode<int>* admixtureCount = new StochasticNode<int> ("count_CPP", new PoissonDistribution(admixtureRate)); admixtureCount->clamp(new int(0)); admixtureRate->clamp(new double(rate_cpp_prior)); if (!useAdmixtureEdges) { admixtureRate->clamp(new double(rate_cpp_prior)); admixtureCount->clamp(new int(0)); } // birth-death process for ultrametric tree StochasticNode<double>* diversificationRate = new StochasticNode<double>("div", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(50.0)) )); StochasticNode<double>* turnover = new StochasticNode<double>("turnover", new UniformDistribution(new ConstantNode<double>("do_lower", new double(0.0)), new ConstantNode<double>("do_upper", new double(1.0)) )); // tree node StochasticNode<AdmixtureTree>* tau = new StochasticNode<AdmixtureTree>( "tau", new AdmixtureConstantBirthDeathProcess(diversificationRate, turnover, (int)numTaxa, snps->getPopulationNames(), snps->getOutgroup()) ); if (startTree) { tau->setValue(new AdmixtureTree(*trees[0])); tau->setIgnoreRedraw(true); } // branch multipliers (mutation rate is clocklike, but population sizes are not) std::vector<const TypedDagNode<double> *> branchRates; std::vector< ContinuousStochasticNode *> branchRates_nonConst; ConstantNode<double>* branchRateA = new ConstantNode<double>( "branchRateA", new double(1)); ConstantNode<double>* branchRateB = new ConstantNode<double>( "branchRateB", new double(2)); ConstantNode<double>* branchRateC = new ConstantNode<double>( "branchRateC", new double(0)); ConstantNode<double>* branchRateD = new ConstantNode<double>( "branchRateD", new double(.01)); //ConstantNode<double>* branchRateE = new ConstantNode<double>( "branchRateE", new double(10)); for( size_t i=0; i<numBranches; i++){ std::ostringstream br_name; br_name << "br_" << i; //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new ExponentialDistribution(branchRateD) ); //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new InverseGammaDistribution(branchRateA, branchRateB)); //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode(br_name.str(), new LognormalDistribution(branchRateC, branchRateA)); //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode(br_name.str(), new UniformDistribution(branchRateC, branchRateE)); // ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new GammaDistribution(branchRateB, branchRateD)); ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new GammaDistribution(branchRateB, branchRateB)); if (!useBranchRates) { tmp_branch_rate->clamp(new double(1.0)); } branchRates.push_back( tmp_branch_rate ); branchRates_nonConst.push_back( tmp_branch_rate ); } DeterministicNode< std::vector< double > >* br_vector = new DeterministicNode< std::vector< double > >( "br_vector", new VectorFunction< double >( branchRates ) ); // model node BrownianMotionAdmixtureGraph* bmag = new BrownianMotionAdmixtureGraph( tau, diffusionRate, admixtureRate, br_vector, snps, useWishart, useContrasts, useBias, discardNonPosDefMtx, blockSize, likelihoodScaler ); StochasticNode<ContinuousCharacterData >* admixtureModel; admixtureModel = new StochasticNode<ContinuousCharacterData >("AdmixtureGraph", bmag); // have to clamp to distinguish likelihood from prior (incidentally calls setValue(), but this is handled otherwise) admixtureModel->clamp( new ContinuousCharacterData() ); // does it event matter how it's clamped? // residuals DeterministicNode<std::vector<double> >* residuals = new DeterministicNode<std::vector<double> >("residuals", new BrownianMotionAdmixtureGraphResiduals(admixtureModel)); // MOVES std::cout << "Adding moves\n"; // moves vector RbVector<Move> moves; // model parameters if (updateParameters) { moves.push_back( new MetropolisHastingsMove( new ScaleProposal(diffusionRate, 0.1), 5, false ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(diversificationRate, 0.5), 5, false ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(turnover, 0.5), 5, false ) ); } // non-admixture tree updates if (updateTopology) { moves.push_back( new AdmixtureNarrowExchange( tau, 0.1, numTaxa/2) ); moves.push_back( new AdmixtureSubtreePruneRegraft( tau, 0.1, numTaxa/4) ); moves.push_back( new AdmixtureFixedNodeheightPruneRegraft(tau, numTaxa/4)); moves.push_back( new AdmixtureEdgeReplaceNNI( tau, residuals, residualWeight, delay, 0, allowSisterAdmixture, numTaxa)); moves.push_back( new AdmixtureEdgeReplaceFNPR( tau, residuals, residualWeight, delay, 0, allowSisterAdmixture, numTaxa)); moves.push_back( new AdmixtureEdgeReplaceSubtreeRegraft( tau, residuals, residualWeight, delay, 0, allowSisterAdmixture, numTaxa)); } if (updateNodeAges) { for (size_t i = numTaxa; i < numNodes - 1; i++) { moves.push_back( new AdmixtureNodeTimeSlideBeta( tau, (int)i, 15.0, false, 1.0 ) ); moves.push_back( new AdmixtureNodeTimeSlideBeta( tau, (int)i, 1.0, false, 0.5 ) ); } } // branch rate updates if (useBranchRates) { // branch rate multipliers for( size_t i=0; i < numBranches; i++) { moves.push_back( new MetropolisHastingsMove( new ScaleProposal(branchRates_nonConst[i], 0.1), 1, false ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(branchRates_nonConst[i], 1.0), .5, false ) ); } // tree rate shift moves.push_back( new AdmixtureShiftTreeRates(diffusionRate, branchRates_nonConst, 0.5, false, 2.0)); // shift node age for branch rate for (size_t i = numTaxa; i < numNodes - 1; i++) { if (updateNodeAges) moves.push_back( new AdmixtureShiftNodeAgeAndRate(tau, branchRates_nonConst, (int)i, 0.7, false, 1.0) ); // MJL 081513: not working, I think... if (updateTopology) { std::vector<DagNode*> pvec; pvec.push_back(tau); pvec.push_back(branchRates_nonConst[i]); //moves.push_back( new AdmixtureSubtreePruneRegraftAndRateShift(pvec, i, 0.5, 1.0) ); // ... something wrong with how the lnProb is computed using the lnProb ratios... } } // NNI with branch rate modifier (not working quite right, disabled) if (updateTopology) moves.push_back( new AdmixtureNearestNeighborInterchangeAndRateShift( tau, branchRates_nonConst, 0.1, false, numTaxa)); } // admixture tree updates if (useAdmixtureEdges) { moves.push_back( new AdmixtureEdgeAddResidualWeights( tau, admixtureRate, admixtureCount, residuals, residualWeight, delay, maxNumberOfAdmixtureEvents, allowSisterAdmixture, 10.0) ); moves.push_back( new AdmixtureEdgeRemoveResidualWeights( tau, admixtureRate, admixtureCount, residuals, residualWeight, delay, 10.0) ); moves.push_back( new AdmixtureEdgeReplaceResidualWeights( tau, admixtureRate, branchRates_nonConst, residuals, residualWeight, delay, allowSisterAdmixture, 20.0) ); //moves.push_back( new AdmixtureEdgeMultiRemove( tau, admixtureRate, admixtureCount, residuals, residualWeight, delay, 2.0 ) ); //moves.push_back( new AdmixtureReplaceAndNNI( tau, 0.5, 10.0) ); //moves.push_back( new AdmixtureEdgeAddCladeResiduals( tau, admixtureRate, admixtureCount, residuals, delay, maxNumberOfAdmixtureEvents, allowSisterAdmixture, 2.0) ); //moves.push_back( new AdmixtureEdgeReplaceCladeResiduals( tau, admixtureRate, branchRates_nonConst, residuals, delay, allowSisterAdmixture, 15.0) ); if (updateTopology) { moves.push_back( new AdmixtureEdgeDivergenceMerge( tau, admixtureRate, branchRates_nonConst, admixtureCount, residuals, delay, allowSisterAdmixture, 5.0 )); moves.push_back( new AdmixtureEdgeRegraftReplace( tau, residuals, 1.0, delay, maxNumberOfAdmixtureEvents, allowSisterAdmixture, 5.0)); ; } moves.push_back( new AdmixtureEdgeReweight( tau, delay, 10.0, 10.0) ); moves.push_back( new AdmixtureEdgeReversePolarity( tau, delay, 2.0, 10.0) ); moves.push_back( new AdmixtureEdgeSlide( tau, branchRates_nonConst, delay, allowSisterAdmixture, 10.0, 10.0) ); moves.push_back( new AdmixtureEdgeFNPR( tau, branchRates_nonConst, delay, allowSisterAdmixture, 10.0, 10.0) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(admixtureRate, 0.1), 5, false ) ); } // MONITORS std::cout << "Adding monitors\n"; RbVector<Monitor> monitors; // parameter monitor std::vector<DagNode*> monitoredNodes; monitoredNodes.push_back( diffusionRate ); monitoredNodes.push_back( admixtureRate ); monitoredNodes.push_back( diversificationRate ); monitoredNodes.push_back( turnover ); monitoredNodes.push_back( admixtureCount ); if (useBranchRates) { for( size_t i=0; i<numBranches; i++){ monitoredNodes.push_back( branchRates_nonConst[i] ); } } monitors.push_back( new FileMonitor( monitoredNodes, 1, "/Users/mlandis/data/admix/output/" + outName + ".parameters.txt", "\t", true, true, true, useParallelMcmcmc, useParallelMcmcmc, useParallelMcmcmc ) ); monitors.push_back( new ScreenMonitor( monitoredNodes, 1, "\t" ) ); monitors.push_back( new AdmixtureBipartitionMonitor(tau, diffusionRate, br_vector, numTreeResults, numAdmixtureResults, 1, "/Users/mlandis/data/admix/output/" + outName + ".bipartitions.txt", "\t", true, true, true, true, true, true ) ); monitors.push_back( new AdmixtureResidualsMonitor(residuals, snps->getPopulationNames(), 10, "/Users/mlandis/data/admix/output/" + outName + ".residuals.txt", "\t", true, true, true, true ) ); //monitors.push_back( new ExtendedNewickAdmixtureTreeMonitor( tau, br_vector, true, true, 10, "/Users/mlandis/data/admix/output/" + outName + ".admixture_trees.txt", "\t", true, true, true, true ) ); //monitors.push_back( new ExtendedNewickAdmixtureTreeMonitor( tau, br_vector, false, true, 10, "/Users/mlandis/data/admix/output/" + outName + ".topology_trees.trees", "\t", true, true, true, true ) ); monitors.push_back( new ExtendedNewickAdmixtureTreeMonitor( tau, br_vector, false, false, 10, "/Users/mlandis/data/admix/output/" + outName + ".time_trees.trees", "\t", true, true, true, true ) ); // MODEL std::cout << "Calling model\n"; std::set<const DagNode*> mset; mset.insert(admixtureRate); Model myModel = Model(mset); // MCMC std::cout << "Calling mcmc\n"; if (!useParallelMcmcmc) { Mcmc myMcmc = Mcmc(myModel, moves, monitors); myMcmc.run(mcmcGenerations); myMcmc.printOperatorSummary(); } else { ParallelMcmcmc myPmc3(myModel, moves, monitors, "random", numChains, numProcesses, swapInterval, deltaTemp, sigmaTemp, startingHeat); myPmc3.run(mcmcGenerations/divGens); myPmc3.printOperatorSummary(); } std::cout << "All done!\n"; // OBJECT CLEANUP delete snps; delete a_bm; delete b_bm; delete c; delete tau; delete diversificationRate; delete turnover; delete diffusionRate; delete admixtureRate; delete admixtureCount; delete branchRateA; delete branchRateB; delete branchRateC; delete branchRateD; branchRates_nonConst.clear(); branchRates.clear(); delete br_vector; //delete bmag; // malloc deallocation error delete admixtureModel; delete residuals; moves.clear(); monitors.clear(); return true; }
bool TestUCLNRelaxedClockBHT92Model::run( void ) { std::vector<unsigned int> seeds; seeds.push_back(7); seeds.push_back(4); GLOBAL_RNG->setSeed( seeds ); /* First, we read in the data */ // the matrix std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename); std::cout << "Read " << data.size() << " matrices." << std::endl; std::cout << data[0] << std::endl; std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename ); std::cout << "Read " << trees.size() << " trees." << std::endl; std::cout << trees[0]->getNewickRepresentation() << std::endl; /* set up the model graph */ ////////////////////// // first the priors // ////////////////////// // birth-death process priors StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(100.0)) )); ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0)); ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0)); // Setting up the substitution model // //ts/tv ratio: ConstantNode<double > *tstv_prior = new ConstantNode<double >( "tstv_prior", new double(0.25) ); ContinuousStochasticNode *tstv = new ContinuousStochasticNode("tstv", new ExponentialDistribution(tstv_prior) ); //GC content prior: ConstantNode<double > *eq_gc_prior = new ConstantNode<double >( "eq_gc_prior_ab", new double(1.0) ); //Root GC frequency StochasticNode< double > *omega = new StochasticNode< double >( "omega", new BetaDistribution(eq_gc_prior,eq_gc_prior) ); DeterministicNode<std::vector<double> > *rf = new DeterministicNode< std::vector<double> >( "rf", new NucleotideFrequenciesFromGcContentFunction( omega ) ); std::cout << "omega:\t" << omega->getValue() << std::endl; std::cout << "rf:\t" << rf->getValue() << std::endl; std::cout << "tstv:\t" << tstv->getValue() << std::endl; //Declaring a vector of matrices, one per branch size_t numBranches = 2*data[0]->getNumberOfTaxa() - 2; std::vector<ContinuousStochasticNode*> thetas; std::vector< const TypedDagNode < RateMatrix >* > qs; //Equilibrium GC frequency: one per branch, defined in the loop along with the T92 rate matrices. for (unsigned int i = 0 ; i < numBranches ; i++ ) { std::ostringstream eq_gc_name; eq_gc_name << "eq_gc(" << i << ")"; thetas.push_back(new ContinuousStochasticNode( eq_gc_name.str(), new BetaDistribution(eq_gc_prior,eq_gc_prior) ) ); std::ostringstream q_name; q_name << "q(" << i << ")"; qs.push_back(new DeterministicNode< RateMatrix >( q_name.str(), new Tamura92RateMatrixFunction( thetas[i], tstv) )); //std::cout << "Matrix Q:\t"<<i<<"\t" << qs[i]->getValue() << std::endl; } //Build a node out of the vector of nodes DeterministicNode< RbVector< RateMatrix > >* qs_node = new DeterministicNode< RbVector< RateMatrix > >( "q_vector", new RbVectorFunction<RateMatrix>(qs) ); // Setting up the relaxed clock model // ConstantNode<double> *a = new ConstantNode<double>("a", new double(0.5) ); ConstantNode<double> *b = new ConstantNode<double>("b", new double(0.25) ); std::vector<const TypedDagNode<double> *> branchRates; std::vector< ContinuousStochasticNode *> branchRates_nonConst; for( size_t i=0; i<numBranches; i++){ std::ostringstream br_name; br_name << "br(" << i << ")"; ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new LognormalDistribution(a, b, new ConstantNode<double>("offset", new double(0.0) ))); branchRates.push_back( tmp_branch_rate ); branchRates_nonConst.push_back( tmp_branch_rate ); } //Build a node out of the vector of nodes DeterministicNode< std::vector< double > >* br_vector = new DeterministicNode< std::vector< double > >( "br_vector", new VectorFunction< double >( branchRates ) ); // Putting it all together // std::vector<std::string> names = data[0]->getTaxonNames(); ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( trees[0]->getRoot().getAge()*2.0 ) ); StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, div, turn, rho, "uniform", "survival", int(names.size()), names, std::vector<Clade>()) ); //If we want to get a good starting tree // tau->setValue( trees[0] ); std::cout << "tau:\t" << tau->getValue() << std::endl; // and the character model // StochasticNode<CharacterData<DnaState> > *charactermodel = new StochasticNode<CharacterData <DnaState> >("S", new SimpleGTRBranchRateTimeCharEvoModel<DnaState, TimeTree>(tau, q, br_vector, true, data[0]->getNumberOfCharacters()) ); GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters()); phyloCTMC->setRootFrequencies( rf ); phyloCTMC->setRateMatrix( qs_node ); phyloCTMC->setClockRate( br_vector ); StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC ); charactermodel->clamp( data[0] ); /* add the moves */ RbVector<Move> moves; moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 2, true ) ); moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) ); moves.push_back( new NarrowExchange( tau, 10.0 ) ); moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) ); moves.push_back( new SubtreeScale( tau, 5.0 ) ); moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) ); moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) ); moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) ); moves.push_back( new BetaSimplexMove( omega, 10.0, true, 2.0 ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(tstv, 1.0), 2, true ) ); for (unsigned int i = 0 ; i < numBranches ; i ++ ) { moves.push_back( new BetaSimplexMove( dynamic_cast<StochasticNode<double>* >(thetas[i]), 10.0, true, 2.0 ) ); moves.push_back( new SlidingMove( thetas[i], 0.05, true, 2.0) ); // moves.push_back( new SimplexMove( ers[i], 100.0, 6, true, 2.0 ) ); // moves.push_back( new SimplexMove( pis[i], 100.0, 4, true, 2.0 ) ); } // add some tree stats to monitor DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) ); /* add the monitors */ RbVector<Monitor> monitors; std::set<DagNode*> monitoredNodes; // monitoredNodes.insert( er ); // monitoredNodes.insert( pi ); monitoredNodes.insert( div ); monitors.push_back( new FileMonitor( monitoredNodes, 10, "TestUCLNRelaxedClockBHT92Model.log", "\t" ) ); std::set<DagNode*> monitoredNodes1; // monitoredNodes1.insert( er ); for (unsigned int i = 0 ; i < numBranches ; i ++ ) { monitoredNodes1.insert( thetas[i] ); } monitoredNodes1.insert( rf ); monitoredNodes1.insert( treeHeight ); monitors.push_back( new FileMonitor( monitoredNodes1, 10, "TestUCLNRelaxedClockBHT92ModelSubstRates.log", "\t" ) ); monitors.push_back( new ScreenMonitor( monitoredNodes1, 10, "\t" ) ); std::set<DagNode*> monitoredNodes2; monitoredNodes2.insert( tau ); monitors.push_back( new FileMonitor( monitoredNodes2, 10, "TestUCLNRelaxedClockBHT92Model.tree", "\t", false, false, false ) ); /* instantiate the model */ Model myModel = Model(qs[0]); /* instiate and run the MCMC */ Mcmc myMcmc = Mcmc( myModel, moves, monitors ); myMcmc.run(mcmcGenerations); myMcmc.printOperatorSummary(); /* clean up */ // for (size_t i = 0; i < 10; ++i) { // delete x[i]; // } // delete [] x; delete div; // delete sigma; // delete a; // delete b; // delete c; std::cout << "Finished GTR model test." << std::endl; return true; }
bool TestACLNRatesGen::run( void ) { // alignmentFilename = "/Users/tracyh/Code/RevBayes_proj/tests/time_trees/tt_CLK_GTRG.nex"; // treeFilename = "/Users/tracyh/Code/RevBayes_proj/tests/time_trees/tt_CLK_true_relx.tre"; std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename); std::cout << "Read " << data.size() << " matrices." << std::endl; std::cout << data[0] << std::endl; // First, we read in the data std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename ); std::cout << "Read " << trees.size() << " trees." << std::endl; std::cout << trees[0]->getNewickRepresentation() << std::endl; // ####################################### // ###### birth-death process priors ##### // ####################################### // Constant nodes ConstantNode<double> *dLambda = new ConstantNode<double>("div_rate", new double(1.0 / 5.0)); // Exponential rate for prior on div ConstantNode<double> *turnA = new ConstantNode<double>("turn_alpha", new double(2.0)); // Beta distribution alpha ConstantNode<double> *turnB = new ConstantNode<double>("turn_beta", new double(2.0)); // Beta distribution beta ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0)); // assume 100% sampling for now ConstantNode<double> *meanOT = new ConstantNode<double>("meanOT", new double(trees[0]->getRoot().getAge()*1.5)); ConstantNode<double> *stdOT = new ConstantNode<double>("stdOT", new double(10.0)); // Stochastic nodes StochasticNode<double> *origin = new StochasticNode<double>( "origin", new NormalDistribution(meanOT, stdOT) ); StochasticNode<double> *div = new StochasticNode<double>("diversification", new ExponentialDistribution(dLambda)); StochasticNode<double> *turn = new StochasticNode<double>("turnover", new BetaDistribution(turnA, turnB)); // Deterministic nodes // birthRate = div / (1 - turn) DeterministicNode<double> *birthRate = new DeterministicNode<double>("birth_rate", new BirthRateConstBDStatistic(div, turn)); // deathRate = (div * turn) / ( 1 - turn) DeterministicNode<double> *deathRate = new DeterministicNode<double>("death_rate", new DeathRateConstBDStatistic(div, turn)); // For some datasets with large root ages, if div>1.0 (or so), the probability is NaN RandomNumberGenerator* rng = GLOBAL_RNG; div->setValue(rng->uniform01() / 1.5); // Birth-death tree std::vector<std::string> names = data[0]->getTaxonNames(); std::vector<RevBayesCore::Taxon> taxa; for (size_t i = 0; i < names.size(); ++i) { taxa.push_back( Taxon( names[i] ) ); } StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, birthRate, deathRate, rho, "uniform", "nTaxa", taxa, std::vector<Clade>()) ); DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) ); // ############################################## // #### ACLN Model on Branch Rates ##### // ############################################## size_t numBranches = 2 * data[0]->getNumberOfTaxa() - 2; size_t numNodes = numBranches + 1; // model rates at nodes ConstantNode<double> *a = new ConstantNode<double>("a", new double(4.0) ); ConstantNode<double> *b = new ConstantNode<double>("b", new double(4.0) ); ConstantNode<double> *anu = new ConstantNode<double>("a_nu", new double(1.0) ); ConstantNode<double> *bnu = new ConstantNode<double>("b_nu", new double(8.0) ); StochasticNode<double> *rootRate = new StochasticNode<double>("root.rate", new GammaDistribution(a, b)); StochasticNode<double> *bmNu = new StochasticNode<double>("BM_var", new GammaDistribution(anu, bnu)); size_t rootID = trees[0]->getRoot().getIndex(); ConstantNode<double> *crInv = new ConstantNode<double>("invCr", new double(1.0) ); DeterministicNode<double> *scaleRate = new DeterministicNode<double>("scaleRate", new BinaryDivision<double, double, double>(crInv, treeHeight)); StochasticNode< std::vector< double > > *nodeRates = new StochasticNode< std::vector< double > >( "NodeRates", new AutocorrelatedLognormalRateDistribution(tau, bmNu, rootRate, scaleRate) ); std::cout << nodeRates->getValue().size() << std::endl; std::vector<const TypedDagNode<double> *> branchRates; for( size_t i=0; i<numBranches; i++){ std::ostringstream brName; brName << "br(" << i << ")"; DeterministicNode<double> *tmpBrRt = new DeterministicNode<double>(brName.str(), new RateOnBranchAve(nodeRates, tau, scaleRate, i)); branchRates.push_back( tmpBrRt ); } DeterministicNode< std::vector< double > >* brVector = new DeterministicNode< std::vector< double > >( "branchRates", new VectorFunction< double >( branchRates ) ); // making a combined DagNode for a compound move std::vector<DagNode*> treeAndRates; treeAndRates.push_back( tau ); treeAndRates.push_back(nodeRates); treeAndRates.push_back(rootRate); // #################################### // ###### GTR model priors ###### // Constant nodes ConstantNode<std::vector<double> > *bf = new ConstantNode<std::vector<double> >( "bf", new std::vector<double>(4,1.0) ); ConstantNode<std::vector<double> > *e = new ConstantNode<std::vector<double> >( "e", new std::vector<double>(6,1.0) ); // Stochastic nodes StochasticNode<std::vector<double> > *pi = new StochasticNode<std::vector<double> >( "pi", new DirichletDistribution(bf) ); StochasticNode<std::vector<double> > *er = new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) ); DeterministicNode<RateMatrix> *q = new DeterministicNode<RateMatrix>( "Q", new GtrRateMatrixFunction(er, pi) ); std::cout << "Q:\t" << q->getValue() << std::endl; // ####### Gamma Rate Het. ###### ConstantNode<double> *shapePr = new ConstantNode<double>("gammaShPr", new double(0.5)); StochasticNode<double> *srAlpha = new StochasticNode<double>("siteRates.alpha", new ExponentialDistribution(shapePr)); ConstantNode<double> *q1 = new ConstantNode<double>("q1", new double(0.125) ); DeterministicNode<double> *q1Value = new DeterministicNode<double>("q1_value", new QuantileFunction(q1, new GammaDistribution(srAlpha, srAlpha) ) ); ConstantNode<double> *q2 = new ConstantNode<double>("q2", new double(0.375) ); DeterministicNode<double> *q2Value = new DeterministicNode<double>("q2_value", new QuantileFunction(q2, new GammaDistribution(srAlpha, srAlpha) ) ); ConstantNode<double> *q3 = new ConstantNode<double>("q3", new double(0.625) ); DeterministicNode<double> *q3Value = new DeterministicNode<double>("q3_value", new QuantileFunction(q3, new GammaDistribution(srAlpha, srAlpha) ) ); ConstantNode<double> *q4 = new ConstantNode<double>("q4", new double(0.875) ); DeterministicNode<double> *q4Value = new DeterministicNode<double>("q4_value", new QuantileFunction(q4, new GammaDistribution(srAlpha, srAlpha) ) ); std::vector<const TypedDagNode<double>* > gammaRates = std::vector<const TypedDagNode<double>* >(); gammaRates.push_back(q1Value); gammaRates.push_back(q2Value); gammaRates.push_back(q3Value); gammaRates.push_back(q4Value); DeterministicNode<std::vector<double> > *siteRates = new DeterministicNode<std::vector<double> >( "site_rates", new VectorFunction<double>(gammaRates) ); DeterministicNode<std::vector<double> > *siteRatesNormed = new DeterministicNode<std::vector<double> >( "site_rates_norm", new NormalizeVectorFunction(siteRates) ); tau->setValue( trees[0] ); std::cout << "tau:\t" << tau->getValue() << std::endl; std::cout << " ** origin " << origin->getValue() << std::endl; std::cout << " ** root age " << trees[0]->getRoot().getAge() << std::endl; GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters()); phyloCTMC->setClockRate( brVector ); phyloCTMC->setRateMatrix( q ); phyloCTMC->setSiteRates( siteRatesNormed ); StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC ); charactermodel->clamp( data[0] ); std::cout << " diversification: " << div->getValue() << std::endl; std::cout << " turnover: " << turn->getValue() << std::endl; std::cout << " birth rate: " << birthRate->getValue() << std::endl; std::cout << " death rate: " << deathRate->getValue() << std::endl; /* add the moves */ RbVector<Move> moves; moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 1.0, true ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(turn, 1.0), 1.0, true ) ); // moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) ); // moves.push_back( new NarrowExchange( tau, 10.0 ) ); // moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) ); // moves.push_back( new SubtreeScale( tau, 5.0 ) ); // moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) ); moves.push_back( new RootTimeSlide( tau, 50.0, true, 10.0 ) ); moves.push_back( new OriginTimeSlide( origin, tau, 50.0, true, 10.0 ) ); moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) ); moves.push_back( new SimplexMove( er, 450.0, 6, 0, true, 2.0, 0.5 ) ); moves.push_back( new SimplexMove( pi, 250.0, 4, 0, true, 2.0, 0.5 ) ); moves.push_back( new SimplexMove( er, 200.0, 1, 0, false, 0.5 ) ); moves.push_back( new SimplexMove( pi, 100.0, 1, 0, false, 0.5 ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(srAlpha, log(2.0)), 1, true ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(bmNu, 0.75), 4, true ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(rootRate, 0.5), 2, false ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(rootRate, 1.0), 2, false ) ); moves.push_back( new ScaleSingleACLNRatesMove( nodeRates, 1.0, false, 8.0 * (double)numNodes) ); moves.push_back( new ScaleSingleACLNRatesMove( nodeRates, 2.0, false, 8.0 * (double)numNodes) ); moves.push_back( new RateAgeACLNMixingMove( treeAndRates, 0.02, false, 2.0 ) ); // add some tree stats to monitor DeterministicNode<double> *meanNdRate = new DeterministicNode<double>("MeanNodeRate", new MeanVecContinuousValStatistic(nodeRates) ); /* add the monitors */ RbVector<Monitor> monitors; std::vector<DagNode*> monitoredNodes; monitoredNodes.push_back( meanNdRate ); monitoredNodes.push_back( treeHeight ); monitoredNodes.push_back( origin ); monitoredNodes.push_back( nodeRates ); monitoredNodes.push_back( rootRate ); monitoredNodes.push_back( bmNu ); monitoredNodes.push_back( scaleRate ); monitors.push_back( new ScreenMonitor( monitoredNodes, 10, "\t" ) ); monitoredNodes.push_back( div ); monitoredNodes.push_back( turn ); monitoredNodes.push_back( birthRate ); monitoredNodes.push_back( deathRate ); monitoredNodes.push_back( pi ); monitoredNodes.push_back( er ); monitoredNodes.push_back( srAlpha ); monitoredNodes.push_back( brVector ); std::string logFN = "clock_test/test_rb_ACLN_6June_rn_3.log"; monitors.push_back( new FileMonitor( monitoredNodes, 10, logFN, "\t" ) ); std::set<DagNode*> monitoredNodes2; monitoredNodes2.insert( tau ); // std::string treFN = "clock_test/test_rb_ACLN_6June_pr.tre"; // monitors.push_back( new FileMonitor( monitoredNodes2, 10, treFN, "\t", false, false, false ) ); /* instantiate the model */ Model myModel = Model(q); mcmcGenerations = 200000; /* instiate and run the MCMC */ Mcmc myMcmc = Mcmc( myModel, moves, monitors ); myMcmc.run(mcmcGenerations); myMcmc.printOperatorSummary(); /* clean up */ // delete div; // delete turn; // delete rho; // delete cp; // delete branchRates; // delete q; // delete tau; delete charactermodel; // delete a; // delete birthRate; // delete phyloCTMC; // delete dLambda; monitors.clear(); moves.clear(); return true; }
bool TestGtrGammaModel::run( void ) { /* First, we read in the data */ // the matrix NclReader& reader = NclReader::getInstance(); std::vector<AbstractCharacterData*> data = reader.readMatrices(alignmentFilename); std::cout << "Read " << data.size() << " matrices." << std::endl; std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename ); std::cout << "Read " << trees.size() << " trees." << std::endl; std::cout << trees[0]->getNewickRepresentation() << std::endl; /* set up the model graph */ ////////////////////// // first the priors // ////////////////////// // birth-death process priors StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("", new double(0.0)), new ConstantNode<double>("", new double(100.0)) )); ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0)); ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0)); // gtr model priors ConstantNode<std::vector<double> > *bf = new ConstantNode<std::vector<double> >( "bf", new std::vector<double>(4,1.0) ); ConstantNode<std::vector<double> > *e = new ConstantNode<std::vector<double> >( "e", new std::vector<double>(6,1.0) ); std::cout << "bf:\t" << bf->getValue() << std::endl; std::cout << "e:\t" << e->getValue() << std::endl; // then the parameters StochasticNode<std::vector<double> > *pi = new StochasticNode<std::vector<double> >( "pi", new DirichletDistribution(bf) ); StochasticNode<std::vector<double> > *er = new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) ); //Rate heterogeneity ConstantNode<double> *alpha_prior = new ConstantNode<double>("alpha_prior", new double(0.5) ); ContinuousStochasticNode *alpha = new ContinuousStochasticNode("alpha", new ExponentialDistribution(alpha_prior) ); alpha->setValue( new double(0.5) ); std::cout << "alpha:\t" << alpha->getValue() << std::endl; ConstantNode<double> *q1 = new ConstantNode<double>("q1", new double(0.125) ); DeterministicNode<double> *q1_value = new DeterministicNode<double>("q1_value", new QuantileFunction(q1, new GammaDistribution(alpha, alpha) ) ); // StochasticNode<double> *q1_value = new StochasticNode<double>("q1_value", new GammaDistribution(alpha, alpha) ); ConstantNode<double> *q2 = new ConstantNode<double>("q2", new double(0.375) ); DeterministicNode<double> *q2_value = new DeterministicNode<double>("q2_value", new QuantileFunction(q2, new GammaDistribution(alpha, alpha) ) ); // StochasticNode<double> *q2_value = new StochasticNode<double>("q2_value", new GammaDistribution(alpha, alpha) ); ConstantNode<double> *q3 = new ConstantNode<double>("q3", new double(0.625) ); DeterministicNode<double> *q3_value = new DeterministicNode<double>("q3_value", new QuantileFunction(q3, new GammaDistribution(alpha, alpha) ) ); // StochasticNode<double> *q3_value = new StochasticNode<double>("q3_value", new GammaDistribution(alpha, alpha) ); ConstantNode<double> *q4 = new ConstantNode<double>("q4", new double(0.875) ); DeterministicNode<double> *q4_value = new DeterministicNode<double>("q4_value", new QuantileFunction(q4, new GammaDistribution(alpha, alpha) ) ); // StochasticNode<double> *q4_value = new StochasticNode<double>("q4_value", new GammaDistribution(alpha, alpha) ); std::vector<const TypedDagNode<double>* > gamma_rates = std::vector<const TypedDagNode<double>* >(); gamma_rates.push_back(q1_value); gamma_rates.push_back(q2_value); gamma_rates.push_back(q3_value); gamma_rates.push_back(q4_value); DeterministicNode<std::vector<double> > *site_rates = new DeterministicNode<std::vector<double> >( "site_rates", new VectorFunction<double>(gamma_rates) ); // currently unused // ConstantNode<std::vector<double> > *site_rate_probs = new ConstantNode<std::vector<double> >( "site_rate_probs", new std::vector<double>(4,1.0/4.0) ); DeterministicNode<std::vector<double> > *site_rates_norm = new DeterministicNode<std::vector<double> >( "site_rates_norm", new NormalizeVectorFunction(site_rates) ); pi->setValue( new std::vector<double>(4,1.0/4.0) ); er->setValue( new std::vector<double>(6,1.0/6.0) ); std::cout << "pi:\t" << pi->getValue() << std::endl; std::cout << "er:\t" << er->getValue() << std::endl; std::cout << "rates:\t" << site_rates->getValue() << std::endl; std::cout << "rates:\t" << site_rates_norm->getValue() << std::endl; DeterministicNode<RateMatrix> *q = new DeterministicNode<RateMatrix>( "Q", new GtrRateMatrixFunction(er, pi) ); std::cout << "Q:\t" << q->getValue() << std::endl; std::vector<std::string> names = data[0]->getTaxonNames(); ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( trees[0]->getRoot().getAge()*2.0 ) ); std::vector<RevBayesCore::Taxon> taxa; for (size_t i = 0; i < names.size(); ++i) { taxa.push_back( Taxon( names[i] ) ); } StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, div, turn, rho, "uniform", "survival", taxa, std::vector<Clade>()) ); tau->setValue( trees[0] ); std::cout << "tau:\t" << tau->getValue() << std::endl; // and the character model // (unused) size_t numChar = data[0]->getNumberOfCharacters(); GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters()); phyloCTMC->setSiteRates( site_rates_norm ); phyloCTMC->setRateMatrix( q ); StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC ); charactermodel->clamp( static_cast<DiscreteCharacterData<DnaState> *>( data[0] ) ); std::cout << "LnL:\t\t" << charactermodel->getLnProbability() << std::endl; /* add the moves */ RbVector<Move> moves; // moves.push_back( new ScaleMove(div, 1.0, true, 2.0) ); // moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) ); // moves.push_back( new NarrowExchange( tau, 10.0 ) ); // moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) ); // moves.push_back( new SubtreeScale( tau, 5.0 ) ); // moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) ); // moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) ); // moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) ); // moves.push_back( new SimplexMove( er, 10.0, 1, 0, true, 2.0 ) ); // moves.push_back( new SimplexMove( pi, 10.0, 1, 0, true, 2.0 ) ); // moves.push_back( new SimplexMove( er, 100.0, 6, 0, true, 2.0 ) ); // moves.push_back( new SimplexMove( pi, 100.0, 4, 0, true, 2.0 ) ); moves.push_back( new MetropolisHastingsMove( new ScaleProposal(alpha, 1.0), 1, true) ); // moves.push_back( new ScaleMove(q1_value, 1.0, true, 2.0) ); // moves.push_back( new ScaleMove(q2_value, 1.0, true, 2.0) ); // moves.push_back( new ScaleMove(q3_value, 1.0, true, 2.0) ); // moves.push_back( new ScaleMove(q4_value, 1.0, true, 2.0) ); // add some tree stats to monitor DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) ); /* add the monitors */ RbVector<Monitor> monitors; std::set<DagNode*> monitoredNodes; // monitoredNodes.insert( er ); // monitoredNodes.insert( pi ); // monitoredNodes.insert( q ); // monitoredNodes.insert( q1_value ); // monitoredNodes.insert( q2_value ); // monitoredNodes.insert( q3_value ); // monitoredNodes.insert( q4_value ); monitoredNodes.insert( site_rates_norm ); monitoredNodes.insert( alpha ); monitoredNodes.insert( treeHeight ); monitors.push_back( new FileMonitor( monitoredNodes, 1000, "TestGtrGammaModelSubstRates.log", "\t" ) ); monitors.push_back( new ScreenMonitor( monitoredNodes, 1000, "\t" ) ); std::set<DagNode*> monitoredNodes2; monitoredNodes2.insert( tau ); monitors.push_back( new FileMonitor( monitoredNodes2, 1000, "TestGtrGammaModel.tree", "\t", false, false, false ) ); /* instantiate the model */ Model myModel = Model(q); /* instiate and run the MCMC */ Mcmc myMcmc = Mcmc( myModel, moves, monitors ); myMcmc.run(mcmcGenerations); myMcmc.printOperatorSummary(); /* clean up */ // for (size_t i = 0; i < 10; ++i) { // delete x[i]; // } // delete [] x; delete div; // delete sigma; // delete a; // delete b; // delete c; std::cout << "Finished GTR+Gamma model test." << std::endl; return true; }