Ejemplo n.º 1
0
bool TestSequenceSimulation::run( void ) {
    
    /* First, we read in the data */
    std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename );
    std::cout << "Read " << trees.size() << " trees." << std::endl;
    std::cout << trees[0]->getNewickRepresentation() << std::endl;
    
    
    /* set up the model graph */
    
//    std::vector<std::string> names = trees[0]->getTaxonNames();
    ConstantNode<TimeTree> *tree = new ConstantNode<TimeTree>( "tau", new TimeTree( *trees[0] ) );
            
    ConstantNode<RateMatrix> *q = new ConstantNode<RateMatrix>( "Q", new RateMatrix_JC(4) );
        
    ConstantNode<double> *clockRate = new ConstantNode<double>("clockRate", new double(1.0) );
    
    // and the character model
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tree, 4, true, 100);
    phyloCTMC->setClockRate( clockRate );
    phyloCTMC->setRateMatrix( q );
    StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC );
    
    // write the simulated sequence
    FastaWriter writer;
    writer.writeData( "primatesSimulated.fas", charactermodel->getValue() );
    
    return true;
}
Ejemplo n.º 2
0
void RJumpSplineFactory::makeSampler(std::set<StochasticNode*> &nodes, Graph const &graph,
				     std::vector<Sampler*> &samplers) const
{
    std::set<StochasticNode*> nodesThatWillBeSampled;
    for(set<StochasticNode*>::iterator p(nodes.begin()); p != nodes.end(); ++p)
    {
	if(canSample(*p, graph) && nodesThatWillBeSampled.find(*p) == nodesThatWillBeSampled.end())
	{
	    StochasticNode *n = *p;
	    StochasticNode *tmpNode = 0;
	    
	    std::vector<StochasticNode*> vnode;
	    vnode.push_back(n);
	    nodesThatWillBeSampled.insert(n);
	    
	    
	    if(n->parents()[1]->dim()[0] == 2)
	    {
		tmpNode = const_cast<StochasticNode *>(dynamic_cast<StochasticNode const *>(n->parents()[1]->parents()[0]));
		vnode.push_back(tmpNode);
		nodesThatWillBeSampled.insert(tmpNode);
		
		tmpNode = const_cast<StochasticNode *>(dynamic_cast<StochasticNode const *>(n->parents()[1]->parents()[1]));
		vnode.push_back(tmpNode);
	       	nodesThatWillBeSampled.insert(tmpNode);
	    } else {
		
		tmpNode = const_cast<StochasticNode *>(dynamic_cast<StochasticNode const *>(n->parents()[1]));
		vnode.push_back(tmpNode);
		nodesThatWillBeSampled.insert(tmpNode);
	    }

	    samplers.push_back(new RJumpSpline(vnode, graph));
	    vnode.clear();
	    
	}
    }
    
    for(set<StochasticNode*>::iterator p(nodesThatWillBeSampled.begin()); p != nodesThatWillBeSampled.end();)
    {
	nodes.erase(*p);
	++p;
    }   


}
Ejemplo n.º 3
0
vector<Sampler*> RJumpSplineFactory::makeSamplers(set<StochasticNode*> const &nodes, Graph const &graph) const
{
    vector<Sampler*> samplers;
    set<StochasticNode*> nodesThatWillBeSampled;
    for(set<StochasticNode*>::iterator p(nodes.begin()); p != nodes.end(); ++p)
    {
	if(canSample(*p, graph) && nodesThatWillBeSampled.find(*p) == nodesThatWillBeSampled.end())
	{
	    StochasticNode *n = *p;
	    StochasticNode *tmpNode = 0;
	    
	    vector<StochasticNode*> vnode;
	    vnode.push_back(n);
	    nodesThatWillBeSampled.insert(n);
	    
	    
	    if(n->parents()[1]->dim()[0] == 2)
	    {
		tmpNode = const_cast<StochasticNode *>(dynamic_cast<StochasticNode const *>(n->parents()[1]->parents()[0]));
		vnode.push_back(tmpNode);
		nodesThatWillBeSampled.insert(tmpNode);
		
		tmpNode = const_cast<StochasticNode *>(dynamic_cast<StochasticNode const *>(n->parents()[1]->parents()[1]));
		vnode.push_back(tmpNode);
	       	nodesThatWillBeSampled.insert(tmpNode);
	    } else {
		
		tmpNode = const_cast<StochasticNode *>(dynamic_cast<StochasticNode const *>(n->parents()[1]));
		vnode.push_back(tmpNode);
		nodesThatWillBeSampled.insert(tmpNode);
	    }
	    
	    samplers.push_back(new RJumpSpline(new GraphView(vnode, graph, true)));
	    vnode.clear();
	    
	}
    }

    return samplers;
}
Ejemplo n.º 4
0
bool TestSimplexMove::run( void ) {
    
    /* set up the model graph */
    
    // first the priors on mu 
    ConstantNode<std::vector<double> > *a = new ConstantNode<std::vector<double> >( "a", new std::vector<double>(4,50) );
    // then x
    StochasticNode<std::vector<double> > *x = new StochasticNode<std::vector<double> >( "x", new DirichletDistribution(a) );
    
    std::vector<double> *x_val = new std::vector<double>();
    x_val->push_back(0.01);
    x_val->push_back(0.02);
    x_val->push_back(0.02);
    x_val->push_back(0.95);
    x->setValue( x_val );
    
    /* add the moves */
    RbVector<Move> moves;
    moves.push_back( new SimplexMove( x, 100.0, 4, 0, false, 1.0 ) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    monitors.push_back( new FileMonitor( x, 1, "SimplexMoveTest.log", "\t" ) );
    
    /* instantiate the model */
    Model myModel = Model(a);
    
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    
    /* clean up */
    delete x;
    delete a;
    
    return true;
}
bool TestGtrGammaLikelihood::run( void ) {
    
    /* First, we read in the data */
    // the matrix
    NclReader reader = NclReader();
    std::vector<AbstractCharacterData*> data = reader.readMatrices(alignmentFilename);
	
	AbstractDiscreteCharacterData *discrD = dynamic_cast<AbstractDiscreteCharacterData* >(data[0]);
    
	std::cout << "Read " << data.size() << " matrices." << std::endl;
    
    std::vector<TimeTree*> trees = NclReader().readTimeTrees( treeFilename );
    std::cout << "Read " << trees.size() << " trees." << std::endl;
    std::cout << trees[0]->getNewickRepresentation() << std::endl;
    
    /* set up the model graph */
    
    //////////////////////
    // first the priors //
    //////////////////////
        
    // then the parameters
    ConstantNode<RbVector<double> > *pi = new ConstantNode<RbVector<double> >( "pi", new RbVector<double>(4, 1.0/4.0) );
    ConstantNode<RbVector<double> > *er = new ConstantNode<RbVector<double> >( "er", new RbVector<double>(6, 1.0/6.0) );
    
    //Rate heterogeneity
    ConstantNode<double> *alpha = new ConstantNode<double>("alpha", new double(0.5) );
    
    std::cout << "alpha:\t" << alpha->getValue() << std::endl;
    
    ConstantNode<double> *q1 = new ConstantNode<double>("q1", new double(0.125) );
    DeterministicNode<double> *q1_value = new DeterministicNode<double>("q1_value", new QuantileFunction(q1, new GammaDistribution(alpha, alpha) ) );
    ConstantNode<double> *q2 = new ConstantNode<double>("q2", new double(0.375) );
    DeterministicNode<double> *q2_value = new DeterministicNode<double>("q2_value", new QuantileFunction(q2, new GammaDistribution(alpha, alpha) ) );
    ConstantNode<double> *q3 = new ConstantNode<double>("q3", new double(0.625) );
    DeterministicNode<double> *q3_value = new DeterministicNode<double>("q3_value", new QuantileFunction(q3, new GammaDistribution(alpha, alpha) ) );
    ConstantNode<double> *q4 = new ConstantNode<double>("q4", new double(0.875) );
    DeterministicNode<double> *q4_value = new DeterministicNode<double>("q4_value", new QuantileFunction(q4, new GammaDistribution(alpha, alpha) ) );
    
//    ConstantNode<double> *q1_value = new ConstantNode<double>("q1_value", new double(1.0) );
//    ConstantNode<double> *q2_value = new ConstantNode<double>("q2_value", new double(1.0) );
//    ConstantNode<double> *q3_value = new ConstantNode<double>("q3_value", new double(1.0) );
//    ConstantNode<double> *q4_value = new ConstantNode<double>("q4_value", new double(1.0) );
    
    
    std::vector<const TypedDagNode<double>* > gamma_rates = std::vector<const TypedDagNode<double>* >();
    gamma_rates.push_back(q1_value);
    gamma_rates.push_back(q2_value);
    gamma_rates.push_back(q3_value);
    gamma_rates.push_back(q4_value);
    
    DeterministicNode<RbVector<double> > *site_rates = new DeterministicNode<RbVector<double> >( "site_rates", new VectorFunction<double>(gamma_rates) );
    ConstantNode<double> *sumNV = new ConstantNode<double>("sumnv", new double(1.0) );
//    ConstantNode<std::vector<double> > *site_rate_probs = new ConstantNode<std::vector<double> >( "site_rate_probs", new std::vector<double>(4,1.0/4.0) );
        
//    std::cout << "pi:\t" << pi->getValue() << std::endl;
//    std::cout << "er:\t" << er->getValue() << std::endl;
    std::cout << "rates:\t" << site_rates->getValue() << std::endl;
    
    DeterministicNode<RbVector<double> > *site_rates_norm = new DeterministicNode<RbVector<double> >( "site_rates_norm", new NormalizeVectorFunction(site_rates, sumNV) );
    std::cout << "rates:\t" << site_rates_norm->getValue() << std::endl;
    
    DeterministicNode<RateMatrix> *q = new DeterministicNode<RateMatrix>( "Q", new GtrRateMatrixFunction(er, pi) );
    
    std::cout << "Q:\t" << q->getValue() << std::endl;
    
    ConstantNode<TimeTree> *tau = new ConstantNode<TimeTree>( "tau", new TimeTree( *trees[0] ) );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
    
    // and the character model
    size_t numChar = data[0]->getNumberOfCharacters();
//    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *charModel = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, numChar );
    PhyloCTMCSiteHomogeneousNucleotide<DnaState, TimeTree> *charModel = new PhyloCTMCSiteHomogeneousNucleotide<DnaState, TimeTree>(tau, true, numChar );
    charModel->setRateMatrix( q );
    charModel->setSiteRates( site_rates_norm );
//    charModel->setClockRate( clockRate );
    
    StochasticNode< AbstractDiscreteCharacterData > *charactermodel = new StochasticNode< AbstractDiscreteCharacterData >("S", charModel );
    charactermodel->clamp( discrD );
    
    std::cout << "BEAST LnL:\t\t\t\t" << -6281.4026 << std::endl;
    std::cout << "RevBayes LnL:\t\t" << charactermodel->getLnProbability() << std::endl;
    
    std::cout << "Finished GTR+Gamma model test." << std::endl;
    
    return true;
}
bool TestAutocorrelatedBranchHeterogeneousGtrModel::run( void ) {
    
    // fix the rng seed
    std::vector<unsigned int> seed;
    seed.push_back(25);
    seed.push_back(42);
    GLOBAL_RNG->setSeed(seed);
    
    /* First, we read in the data */
    // the matrix
    std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename);
    std::cout << "Read " << data.size() << " matrices." << std::endl;
    std::cout << data[0] << std::endl;
    
    std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename );
    std::cout << "Read " << trees.size() << " trees." << std::endl;
    std::cout << trees[0]->getNewickRepresentation() << std::endl;
    
    
    /* set up the model graph */
    
    //////////////////////
    // first the priors //
    //////////////////////
    
    // birth-death process priors
    StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(100.0)) ));
    ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0));
    ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0));
    // gtr model priors
    ConstantNode<std::vector<double> > *bf = new ConstantNode<std::vector<double> >( "bf", new std::vector<double>(4,1.0) );
    ConstantNode<std::vector<double> > *e = new ConstantNode<std::vector<double> >( "e", new std::vector<double>(6,1.0) );
    
    //Root frequencies
    StochasticNode<std::vector<double> > *rf = new StochasticNode<std::vector<double> >( "rf", new DirichletDistribution(bf) );
    
    
    StochasticNode<std::vector<double> > * er = new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) ) ;
    
    std::cout << "bf:\t" << bf->getValue() << std::endl;
    std::cout << "e:\t" << e->getValue() << std::endl;
    
    
    std::vector<std::string> names = data[0]->getTaxonNames();
    ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( trees[0]->getRoot().getAge()*2.0 ) );
    std::vector<RevBayesCore::Taxon> taxa;
    for (size_t i = 0; i < names.size(); ++i)
    {
        taxa.push_back( Taxon( names[i] ) );
    }
    StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, div, turn, rho, "uniform", "survival", taxa, std::vector<Clade>()) );
    
    tau->setValue( trees[0] );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
    
    std::vector<StochasticNode < std::vector<double> >* > pis;
    //  std::vector<StochasticNode < std::vector<double> >* > ers;
    std::vector< const TypedDagNode < RateMatrix>* > qs;
    
    ConstantNode<double> *alpha_prior_shape = new ConstantNode< double >("alpha_prior_shape", new double( 5.0 ) );
    ConstantNode<double> *alpha_prior_rate = new ConstantNode< double >("alpha_prior_rtae", new double( 0.5 ) );
    StochasticNode<double> *alpha = new StochasticNode<double>("alpha", new GammaDistribution( alpha_prior_shape, alpha_prior_rate ) );
    
    ConstantNode<double> *beta_prior_shape1 = new ConstantNode< double >("beta_prior_shape1", new double( 2.0 ) );
    ConstantNode<double> *beta_prior_shape2 = new ConstantNode< double >("beta_prior_shape2", new double( 5.0 ) );
    StochasticNode<double> *beta = new StochasticNode<double>("beta", new BetaDistribution( beta_prior_shape1, beta_prior_shape2 ) );
    
    StochasticNode< RbVector<RateMatrix> > *perBranchQ = new StochasticNode< RbVector< RateMatrix > >( "autocorrBranchRate", new AutocorrelatedBranchMatrixDistribution( tau, beta, rf, er, alpha ) );

    //    StochasticNode< std::vector<RateMatrix> > *perBranchQ = new StochasticNode< std::vector< RateMatrix > >( "autocorrBranchRate", new DPP< RateMatrix >( tau, ... ) );
    
//    
//    for (unsigned int i = 0 ; i < numBranches ; i++ ) {
//        std::ostringstream pi_name;
//        pi_name << "pi(" << i << ")";
//        pis.push_back(new StochasticNode<std::vector<double> >( pi_name.str(), new DirichletDistribution(bf) ) );
//        //  ers.push_back(new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) ) );
//        std::ostringstream q_name;
//        q_name << "q(" << i << ")";
//        qs.push_back(new DeterministicNode<RateMatrix>( q_name.str(), new GtrRateMatrixFunction(er, pis[i]) ));
//        std::cout << "Q:\t" << qs[i]->getValue() << std::endl;
//    }
    
    
    // and the character model
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters());
    phyloCTMC->setRootFrequencies( rf );
    phyloCTMC->setRateMatrix( perBranchQ );
    StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC );
    charactermodel->clamp( data[0] );
    
    
    /* add the moves */
    RbVector<Move> moves;
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 2, true ) );
    moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) );
    moves.push_back( new NarrowExchange( tau, 10.0 ) );
    moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) );
    moves.push_back( new SubtreeScale( tau, 5.0 ) );
//    moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) );
    moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) );
    moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) );
    moves.push_back( new SimplexMove( er, 10.0, 1, 0, true, 2.0 ) );
    moves.push_back( new SimplexMove( er, 100.0, 6, 0, true, 2.0 ) );
    moves.push_back( new SimplexMove( rf, 10.0, 1, 0, true, 2.0 ) );
    moves.push_back( new SimplexMove( rf, 100.0, 4, 0, true, 2.0 ) );
    
//    for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
//        //     moves.push_back( new SimplexMove( ers[i], 10.0, 1, true, 2.0 ) );
//        moves.push_back( new SimplexMove( pis[i], 10.0, 1, true, 2.0 ) );
//        //    moves.push_back( new SimplexMove( ers[i], 100.0, 6, true, 2.0 ) );
//        moves.push_back( new SimplexMove( pis[i], 100.0, 4, true, 2.0 ) );
//    }
    // add some tree stats to monitor
    DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    std::set<DagNode*> monitoredNodes;
    //    monitoredNodes.insert( er );
    //    monitoredNodes.insert( pi );
    monitoredNodes.insert( div );
    monitors.push_back( new FileMonitor( monitoredNodes, 10, "TestAutocorrelatedBranchHeterogeneousGtrModel.log", "\t" ) );
    std::set<DagNode*> monitoredNodes1;
    monitoredNodes1.insert( er );
    /*    for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
     monitoredNodes1.insert( pis[i] );
     }*/
    monitoredNodes1.insert( rf );
    monitoredNodes1.insert( treeHeight );
    monitors.push_back( new FileMonitor( monitoredNodes1, 10, "TestAutocorrelatedBranchHeterogeneousGtrModelSubstRates.log", "\t" ) );
    monitors.push_back( new ScreenMonitor( monitoredNodes1, 10, "\t" ) );
    std::set<DagNode*> monitoredNodes2;
    monitoredNodes2.insert( tau );
    monitors.push_back( new FileMonitor( monitoredNodes2, 10, "TestAutocorrelatedBranchHeterogeneousGtrModel.tree", "\t", false, false, false ) );
    
    /* instantiate the model */
    Model myModel = Model( tau );
    
    std::vector<DagNode*> &nodes = myModel.getDagNodes();
    for (std::vector<DagNode*>::iterator it = nodes.begin(); it != nodes.end(); ++it) {
        std::cerr << (*it)->getName() << std::endl;
    }
    
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    myMcmc.printOperatorSummary();
    
    /* clean up */
    //    for (size_t i = 0; i < 10; ++i) {
    //        delete x[i];
    //    }
    //    delete [] x;
    delete div;
    //    delete sigma;
    //    delete a;
    //    delete b;
    //    delete c;
    
    std::cout << "Finished Autocorrelated Branch Heterogeneous GTR model test." << std::endl;
    
    return true;
}
bool TestBranchHeterogeneousHkyModel::run( void ) {
    
    /* First, we read in the data */
    // the matrix
    std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename);
    std::cout << "Read " << data.size() << " matrices." << std::endl;
    std::cout << data[0] << std::endl;
        
    
    /* set up the model graph */
    
    //////////////////////
    // first the priors //
    //////////////////////
    
    // birth-death process priors
    StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(100.0)) ));
    ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0));
    ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0));
    
    // hky model priors
    ConstantNode<std::vector<double> > *bfPrior = new ConstantNode<std::vector<double> >( "bfPrior", new std::vector<double>(4,1.0) );
    ConstantNode< double > *tstvPrior = new ConstantNode< double >( "tstvPrior", new double(1.0) );
	
    // root frequencies
    StochasticNode<std::vector<double> > *rf = new StochasticNode<std::vector<double> >( "rf", new DirichletDistribution(bfPrior) );
	
	
	
//    // first the hyper-priors of the clock model
    ConstantNode<double> *a = new ConstantNode<double>("a", new double(0.5) );
    ConstantNode<double> *b = new ConstantNode<double>("b", new double(0.25) );
//	
//    
    // then the parameters
    ContinuousStochasticNode *expectLN = new ContinuousStochasticNode( "UCLN.expectation", new ExponentialDistribution(a) ); // the expectation of the LN dist so mu = log(expectLN) - (sigLN^2)/2
    ContinuousStochasticNode *sigLN = new ContinuousStochasticNode("UCLN.variance", new ExponentialDistribution(b) );	
    DeterministicNode<double> *logExpLN = new DeterministicNode<double>("logUCLN.exp", new LnFunction(expectLN) );
   DeterministicNode<double> *squareSigLN = new DeterministicNode<double>("squareSigLN", new BinaryMultiplication<double, double, double>(sigLN, sigLN) );
   DeterministicNode<double> *divSqSigLN = new DeterministicNode<double>("divSqSigLN", new BinaryDivision<double, double, double>(squareSigLN, new ConstantNode<double>( "2", new double (2.0))) );
   DeterministicNode<double> *muValLN = new DeterministicNode<double>("MuValLN", new BinarySubtraction<double, double, double>(logExpLN, divSqSigLN) );
	
   
    //Declaring a vector of HKY matrices
	size_t numBranches = 2*data[0]->getNumberOfTaxa() - 2;
    std::vector<StochasticNode < std::vector<double> >* > pis;
    std::vector< const TypedDagNode< RateMatrix >* > qs;
	StochasticNode < double >* tstv = new ContinuousStochasticNode("tstv", new ExponentialDistribution( tstvPrior ) );
//
//	
    // declaring a vector of clock rates
	std::vector<const TypedDagNode<double> *> branchRates;
	std::vector< ContinuousStochasticNode *> branchRates_nonConst;
    	
	
	
	
    for (unsigned int i = 0 ; i < numBranches ; i++ ) {
        // construct the per branch rate matrix
        std::ostringstream pi_name;
        pi_name << "pi(" << i << ")";
        pis.push_back(new StochasticNode<std::vector<double> >( pi_name.str(), new DirichletDistribution(bfPrior) ) );
        std::ostringstream q_name;
        q_name << "q(" << i << ")";
        qs.push_back(new DeterministicNode<RateMatrix>( q_name.str(), new HkyRateMatrixFunction( tstv, pis[i]) ));
        std::cout << "Q:\t" << qs[i]->getValue() << std::endl;        
        
       // construct the per branch clock rate
       std::ostringstream br_name;
        br_name << "br(" << i << ")";
		ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new LognormalDistribution(muValLN, sigLN, new ConstantNode<double>("offset", new double(0.0) )));
		branchRates.push_back( tmp_branch_rate );
		branchRates_nonConst.push_back( tmp_branch_rate );
	}
    // build the vector containing all rates/rate-matrices
    // instead of independent rates/rate-matrices we could have used anything that specifies a distribution on a set of values
    // e.g. a mixture, DPP or an autocorrelated model
    DeterministicNode< std::vector< double > >* br_vector = new DeterministicNode< std::vector< double > >( "br_vector", new VectorFunction< double >( branchRates ) );
    DeterministicNode< RbVector< RateMatrix > >* qs_node = new DeterministicNode< RbVector< RateMatrix > >( "q_vector", new RbVectorFunction<RateMatrix>(qs) );
    
    
    // create the variables for the rate variation across sites
    // we use the standard 4 categorical gamma rate variation
    // though, any other rates could be used too as long as they are normalized
    ConstantNode<double> *alpha_prior = new ConstantNode<double>("alpha_prior", new double(0.5) );
    ContinuousStochasticNode *alpha = new ContinuousStochasticNode("alpha", new ExponentialDistribution(alpha_prior) );
    
    ConstantNode<double> *q1 = new ConstantNode<double>("q1", new double(0.125) );
    DeterministicNode<double> *q1_value = new DeterministicNode<double>("q1_value", new QuantileFunction(q1, new GammaDistribution(alpha, alpha) ) );
    ConstantNode<double> *q2 = new ConstantNode<double>("q2", new double(0.375) );
    DeterministicNode<double> *q2_value = new DeterministicNode<double>("q2_value", new QuantileFunction(q2, new GammaDistribution(alpha, alpha) ) );
    ConstantNode<double> *q3 = new ConstantNode<double>("q3", new double(0.625) );
    DeterministicNode<double> *q3_value = new DeterministicNode<double>("q3_value", new QuantileFunction(q3, new GammaDistribution(alpha, alpha) ) );
    ConstantNode<double> *q4 = new ConstantNode<double>("q4", new double(0.875) );
    DeterministicNode<double> *q4_value = new DeterministicNode<double>("q4_value", new QuantileFunction(q4, new GammaDistribution(alpha, alpha) ) );
    std::vector<const TypedDagNode<double>* > gamma_rates = std::vector<const TypedDagNode<double>* >();
    gamma_rates.push_back(q1_value);
    gamma_rates.push_back(q2_value);
    gamma_rates.push_back(q3_value);
    gamma_rates.push_back(q4_value);
    
    DeterministicNode<std::vector<double> > *site_rates = new DeterministicNode<std::vector<double> >( "site_rates", new VectorFunction<double>(gamma_rates) );
    DeterministicNode<std::vector<double> > *site_rates_norm = new DeterministicNode<std::vector<double> >( "site_rates_norm", new NormalizeVectorFunction(site_rates) );
    // we actually do not use different probabilities per rate (yet!)
   // ConstantNode<std::vector<double> > *site_rate_probs = new ConstantNode<std::vector<double> >( "site_rate_probs", new std::vector<double>(4,1.0/4.0) );
    
    
	
    // create the stochastic node for the tree
    // we use a birth-death process prior and thus a time-tree
    // we could use as well an unrooted tree
    std::vector<std::string> names = data[0]->getTaxonNames();
    ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( 2.0 ) );
    std::vector<RevBayesCore::Taxon> taxa;
    for (size_t i = 0; i < names.size(); ++i)
    {
        taxa.push_back( Taxon( names[i] ) );
    }
    StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, div, turn, rho, "uniform", "survival", taxa, std::vector<Clade>()) );
    
	
//	//rescale the tree so that its root age is 1
	TimeTree *t = tau->getValue().clone();
	const TopologyNode &root = t->getRoot();
	TreeUtilities::rescaleTree(t, &t->getRoot(), 1.0 / root.getAge());
//	
    tau->setValue( t );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
    
    // and the character model
    //GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *charModel = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, true, data[0]->getNumberOfCharacters() );
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *charModel = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters() );
    // set the branch heterogeneous substitution matrices
    // if you set instead of a vector a single matrix, then you get a homogeneous model
    charModel->setRateMatrix( qs_node );
    charModel->setRootFrequencies( rf );
    // set the per branch clock rates
    // if you instead specify a single rate, you get a strict clock model
    charModel->setClockRate( br_vector );
    // specify the rate variation across sites
    // if you skip this then you get the model without rate variation across sites.
    charModel->setSiteRates( site_rates_norm );
	
    StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", charModel );
    charactermodel->clamp( data[0] );
    
    
    /* add the moves */
    RbVector<Move> moves;
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 2, true ) );
    moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) );
    moves.push_back( new NarrowExchange( tau, 10.0 ) );
    moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) );
    moves.push_back( new SubtreeScale( tau, 5.0 ) );
//Fixintg the root age at 1:
	//  moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) );
	//    moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) );
//test: only 20 instead of 30
    moves.push_back( new NodeTimeSlideUniform( tau, 20.0 ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(tstv, 1.0), 2, true ) );
    moves.push_back( new SimplexSingleElementScale( rf, 10.0, true, 2.0 ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(alpha, 1.0), 2, true ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(expectLN, 1.0), 2, true ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(sigLN, 1.0), 2, true ) );
	std::vector<StochasticNode<double> * > rates;
	for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
		rates.push_back( branchRates_nonConst[i] );
	}
	moves.push_back( new RateAgeBetaShift( tau, rates, 1.0, true, 10.0) );                                                         //!<  constructor

	
	
    for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
        moves.push_back( new SimplexSingleElementScale( pis[i], 10.0, true, 2.0 ) );
        moves.push_back( new MetropolisHastingsMove( new ScaleProposal(branchRates_nonConst[i], 1.0), 1, true ) );
    }
    
    // add some tree stats to monitor
    DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    std::set<DagNode*> monitoredNodes;
    monitoredNodes.insert( tstv );
    monitoredNodes.insert( treeHeight );
    monitors.push_back( new ScreenMonitor( monitoredNodes, 1, "\t" ) );
    std::set<DagNode*> monitoredNodes2;
    monitoredNodes2.insert( tau );
    monitors.push_back( new FileMonitor( monitoredNodes2, 10, "TestBranchHeterogeneousHkyModel.tree", "\t", false, false, false ) );
    
    /* instantiate the model */
    Model myModel = Model(qs[0]);
	
    monitors.push_back( new ModelMonitor( 10, "TestBranchHeterogeneousHkyModel.log", "\t" ) );
	
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
//	myMcmc.burnin(1000, 100);
    myMcmc.run(mcmcGenerations);
    
    myMcmc.printOperatorSummary();
    
    /* clean up */
    //    for (size_t i = 0; i < 10; ++i) {
    //        delete x[i];
    //    }
    //    delete [] x;
    delete div;
    //    delete sigma;
    //    delete a;
    //    delete b;
    //    delete c;
    
    std::cout << "Finished GTR model test." << std::endl;
    
    return true;
}
Ejemplo n.º 8
0
bool TestAdmixtureGraph::run(void) {
    
    std::cout << "Running TestAdmixtureGraph\n";
    
    std::cout << "argc: " << argc << "\n";
    if (argc > 1)
    {
        for (int i = 0; i < argc; i++)
        {
            argTokens.push_back(argv[i]);
            std::cout << i << " " << argTokens[i] << "\n";
        }
        std::cout << argc << " == " << argTokens.size() << " arguments\n";
        snpFilename = argTokens[1];
    }
    
    // MODEL GRAPH
    std::vector<unsigned int> seed;
    seed = GLOBAL_RNG->getSeed();
    std::cout << "seed " << seed[0] << " " << seed[1] << "\n";
    //seed.clear(); seed.push_back(53866); seed.push_back(21201); GLOBAL_RNG->setSeed(seed);
    // 53866 21201
    
    // read in data
    std::string fn = snpFilepath + snpFilename;
    int snpThinBy = 100;
    SnpData* snps = PopulationDataReader().readSnpData(fn,snpThinBy);
    
    // read in tree
    std::vector<AdmixtureTree*> trees;
    bool startTree = false;
    //treeFilename = "";
    if (treeFilename != "")
    {
        // NclReader does not seem to work for Newick strings at this time
        /*
        trees = NclReader::getInstance().readAdmixtureTrees( snpFilepath + treeFilename, "newick" );
        std::cout << "Read " << trees.size() << " trees." << std::endl;
        std::cout << trees[0]->getNewickRepresentation() << std::endl;
         */
        
        // hacky workaround for now...
        //std::string newickStr = "(San:1,((Han:0.348958,Dai:0.348958):0.194964,(((Ket:0.351687,(Koryak:0.344761,(SiberianEskimo:0.325112,(((Huichol:0.269256,(Pima:0.233362,((Karitiana:0.104362,Aymara:0.104362):0.0120051,(Yukpa:0.100877,Mayan:0.100877):0.0154902):0.116995):0.0358943):0.0341607,Athabascan:0.303417):0.0128117,((EastGreenland:0.158699,WestGreenland:0.158699):0.0369703,Aleuts:0.195669):0.120559):0.00888335):0.0196487):0.0069259):0.0987521,((Nivhks:0.278599,Buryat:0.278599):0.0642882,Yakut:0.342887):0.107551):0.0756044,Altai:0.526043):0.0178791):0.456078)";
        //std::string newickStr = "((A:.5,B:.5):.5,C:.5)";
        std::string newickStr = "(San:1,((Koryak:0.926938,(SiberianEskimo:0.812519,((Aleuts:0.762302,(EastGreenland:0.4824,WestGreenland:0.4824):0.279902):0.0144746,((Huichol:0.353522,(Pima:0.324226,((Mayan:0.223067,Yukpa:0.223067):0.0856916,(Aymara:0.173581,Karitiana:0.173581):0.135178):0.015467):0.0292956):0.109212,Athabascan:0.462734):0.314043):0.0357419):0.114419):0.0600046,(Ket:0.772384,((Altai:0.640188,(Han:0.481097,Dai:0.481097):0.159091):0.0445788,((Buryat:0.596074,Nivhks:0.596074):0.015206,Yakut:0.61128):0.0734876):0.0876171):0.214558):0.0130575);";
        NewickConverter nc;
        BranchLengthTree* blt = nc.convertFromNewick(newickStr);
        AdmixtureTree* at = TreeUtilities::convertToAdmixtureTree(*blt, snps->getPopulationNames());
        at->setNames(snps->getPopulationNames());
        at->updateTipOrderByNames(snps->getPopulationNames());
        trees.push_back(at);
        startTree = true;
    }
    
    size_t numTaxa = snps->getNumPopulations();
    size_t numNodes = 2 * numTaxa - 1;
    size_t numBranches = numNodes - 1;
    //size_t numSites = snps->getNumSnps();
    int blockSize = 5000;
    
    double divGens = 1;//.01;
    int delay = 1000;
    int numTreeResults = 500;
    int numAdmixtureResults = 500;
    int maxNumberOfAdmixtureEvents = 1;
    double residualWeight = 2.0;
    
    bool useWishart = true;             // if false, the composite likelihood function is used
    bool useBias = true;               // if false, no covariance bias correction for small sample size is used
    bool useAdmixtureEdges = true;      // if false, no admixture moves or edges are used
    bool useBranchRates = true;         // if false, all populations are of the same size
    bool allowSisterAdmixture = true;   // if false, admixture events cannot be between internal lineages who share a divergence parent
    bool discardNonPosDefMtx = true;    // if false, round negative eigenvalues to positive eps
    bool useContrasts = false;          // nothing really, need to remove
    bool updateParameters = true;
    bool updateTopology = true;
    bool updateNodeAges = true;
    
    bool useParallelMcmcmc = true;
    int numChains = 4;
    int numProcesses = numChains;
//    numProcesses=80;
    int swapInterval = 1;
    double deltaTemp = .1;
    double sigmaTemp = 1.0;
    double hottestTemp = 0.001;
    if (!true)
    {
        deltaTemp = exp(-log(hottestTemp)/pow(numChains-1,sigmaTemp)) - 1;
        std::cout << deltaTemp << "\n";
    }
    
    double startingHeat = 1.0;
    double likelihoodScaler = 1.0;

    std::stringstream rndStr;
    rndStr << std::setw(9) << std::fixed << std::setprecision(0) << std::setfill('0') << std::floor(GLOBAL_RNG->uniform01()*1e9);
    // std::string outName = "papa." + rndStr.str();
    std::string simName = "hgdp";
    std::string outName = simName + "." + rndStr.str();
//    std::string outName = simName + "." + rndStr.str() + "." + snpFilename;
    
    // BM diffusion rate
    ConstantNode<double>* a_bm = new ConstantNode<double>( "bm_a", new double(3));
    ConstantNode<double>* b_bm = new ConstantNode<double>( "bm_b", new double(100));
    //ConstantNode<double>* c_bm = new ConstantNode<double>( "bm_c", new double(0));
    //ConstantNode<double>* d_bm = new ConstantNode<double>( "bm_d", new double(100));
    StochasticNode<double>* diffusionRate = new StochasticNode<double> ("rate_BM", new ExponentialDistribution(b_bm));
    //StochasticNode<double>* diffusionRate = new StochasticNode<double> ("rate_BM", new UniformDistribution(c_bm, d_bm));

    // CPP rate
    // MJL 071713:  Flat Poisson prior cannot overpower model overfitting when lnL is large.
    //              Consider implementing Conway-Maxewell-Poisson distn instead.
    
    // This prior requires admixture events to improve lnL by N units
    // Negative values -> admixture rare
    // Positive values -> admixture common (set admixture cap)
    double adm_th_lnL = 10;
    double rate_cpp_prior = exp(adm_th_lnL);
    
    ConstantNode<double>* c = new ConstantNode<double>( "c", new double(1.0/rate_cpp_prior)); // admixture rate prior
    StochasticNode<double>* admixtureRate = new StochasticNode<double> ("rate_CPP", new ExponentialDistribution(c));
    StochasticNode<int>* admixtureCount = new StochasticNode<int> ("count_CPP", new PoissonDistribution(admixtureRate));
    admixtureCount->clamp(new int(0));
    admixtureRate->clamp(new double(rate_cpp_prior));
    if (!useAdmixtureEdges)
    {
        admixtureRate->clamp(new double(rate_cpp_prior));
        admixtureCount->clamp(new int(0));
    }
    
    
    // birth-death process for ultrametric tree
    StochasticNode<double>* diversificationRate = new StochasticNode<double>("div", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(50.0)) ));
    StochasticNode<double>* turnover = new StochasticNode<double>("turnover", new UniformDistribution(new ConstantNode<double>("do_lower", new double(0.0)), new ConstantNode<double>("do_upper", new double(1.0)) ));
    
    // tree node
    StochasticNode<AdmixtureTree>* tau = new StochasticNode<AdmixtureTree>( "tau", new AdmixtureConstantBirthDeathProcess(diversificationRate, turnover, (int)numTaxa, snps->getPopulationNames(), snps->getOutgroup()) );
    if (startTree)
    {
        tau->setValue(new AdmixtureTree(*trees[0]));
        tau->setIgnoreRedraw(true);
    }
    
    // branch multipliers (mutation rate is clocklike, but population sizes are not)
	std::vector<const TypedDagNode<double> *> branchRates;
    std::vector< ContinuousStochasticNode *> branchRates_nonConst;
    ConstantNode<double>* branchRateA = new ConstantNode<double>( "branchRateA", new double(1));
    ConstantNode<double>* branchRateB = new ConstantNode<double>( "branchRateB", new double(2));
    ConstantNode<double>* branchRateC = new ConstantNode<double>( "branchRateC", new double(0));
    ConstantNode<double>* branchRateD = new ConstantNode<double>( "branchRateD", new double(.01));
    //ConstantNode<double>* branchRateE = new ConstantNode<double>( "branchRateE", new double(10));
	for( size_t i=0; i<numBranches; i++){

        std::ostringstream br_name;
        br_name << "br_" << i;
        //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new ExponentialDistribution(branchRateD) );
        //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new InverseGammaDistribution(branchRateA, branchRateB));
        //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode(br_name.str(), new LognormalDistribution(branchRateC, branchRateA));
        //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode(br_name.str(), new UniformDistribution(branchRateC, branchRateE));
//        ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new GammaDistribution(branchRateB, branchRateD));
        ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new GammaDistribution(branchRateB, branchRateB));
        
		if (!useBranchRates)
        {
            tmp_branch_rate->clamp(new double(1.0));
        }
        
        branchRates.push_back( tmp_branch_rate );
        branchRates_nonConst.push_back( tmp_branch_rate );
        
    }
    DeterministicNode< std::vector< double > >* br_vector = new DeterministicNode< std::vector< double > >( "br_vector", new VectorFunction< double >( branchRates ) );

    
    // model node
    BrownianMotionAdmixtureGraph* bmag = new BrownianMotionAdmixtureGraph( tau, diffusionRate, admixtureRate, br_vector, snps, useWishart, useContrasts, useBias, discardNonPosDefMtx, blockSize, likelihoodScaler );
    StochasticNode<ContinuousCharacterData >* admixtureModel;
    admixtureModel = new StochasticNode<ContinuousCharacterData >("AdmixtureGraph", bmag);
    
    // have to clamp to distinguish likelihood from prior (incidentally calls setValue(), but this is handled otherwise)
    admixtureModel->clamp( new ContinuousCharacterData() ); // does it event matter how it's clamped?
    
    // residuals
    DeterministicNode<std::vector<double> >* residuals = new DeterministicNode<std::vector<double> >("residuals", new BrownianMotionAdmixtureGraphResiduals(admixtureModel));
    
    // MOVES
    std::cout << "Adding moves\n";
        
    // moves vector
    RbVector<Move> moves;

    // model parameters
    if (updateParameters)
    {
        moves.push_back( new MetropolisHastingsMove( new ScaleProposal(diffusionRate, 0.1), 5, false ) );
        moves.push_back( new MetropolisHastingsMove( new ScaleProposal(diversificationRate, 0.5), 5, false ) );
        moves.push_back( new MetropolisHastingsMove( new ScaleProposal(turnover, 0.5), 5, false ) );
    }
    
    
    // non-admixture tree updates
    if (updateTopology)
    {
        moves.push_back( new AdmixtureNarrowExchange( tau, 0.1, numTaxa/2) );
        moves.push_back( new AdmixtureSubtreePruneRegraft( tau, 0.1, numTaxa/4) );
        moves.push_back( new AdmixtureFixedNodeheightPruneRegraft(tau, numTaxa/4));
        
        moves.push_back( new AdmixtureEdgeReplaceNNI( tau, residuals, residualWeight, delay, 0, allowSisterAdmixture, numTaxa));
        moves.push_back( new AdmixtureEdgeReplaceFNPR( tau, residuals, residualWeight, delay, 0, allowSisterAdmixture, numTaxa));
        moves.push_back( new AdmixtureEdgeReplaceSubtreeRegraft( tau, residuals, residualWeight, delay, 0, allowSisterAdmixture, numTaxa));
    }

    if (updateNodeAges)
    {
        for (size_t i = numTaxa; i < numNodes - 1; i++)
        {
            moves.push_back( new AdmixtureNodeTimeSlideBeta( tau, (int)i, 15.0, false, 1.0 ) );
            moves.push_back( new AdmixtureNodeTimeSlideBeta( tau, (int)i, 1.0, false, 0.5 ) );
        }
    }
    
    // branch rate updates
    if (useBranchRates)
    {
        // branch rate multipliers
        for( size_t i=0; i < numBranches; i++)
        {
            moves.push_back( new MetropolisHastingsMove( new ScaleProposal(branchRates_nonConst[i], 0.1), 1, false ) );
            moves.push_back( new MetropolisHastingsMove( new ScaleProposal(branchRates_nonConst[i], 1.0), .5, false ) );
        }
        
        // tree rate shift
        moves.push_back( new AdmixtureShiftTreeRates(diffusionRate, branchRates_nonConst, 0.5, false, 2.0));
        
        // shift node age for branch rate
        for (size_t i = numTaxa; i < numNodes - 1; i++)
        {
            if (updateNodeAges)
                moves.push_back( new AdmixtureShiftNodeAgeAndRate(tau, branchRates_nonConst, (int)i, 0.7, false, 1.0) );
            
            // MJL 081513: not working, I think...
            if (updateTopology)
            {
                std::vector<DagNode*> pvec;
                pvec.push_back(tau);
                pvec.push_back(branchRates_nonConst[i]);
                //moves.push_back( new AdmixtureSubtreePruneRegraftAndRateShift(pvec, i, 0.5, 1.0) );
                // ... something wrong with how the lnProb is computed using the lnProb ratios...
            }
        }
        
        // NNI with branch rate modifier (not working quite right, disabled)
        if (updateTopology)
            moves.push_back( new AdmixtureNearestNeighborInterchangeAndRateShift( tau, branchRates_nonConst, 0.1, false, numTaxa));
        
        
    }
        
    // admixture tree updates
    if (useAdmixtureEdges)
    {
    
        moves.push_back( new AdmixtureEdgeAddResidualWeights( tau, admixtureRate, admixtureCount, residuals, residualWeight, delay, maxNumberOfAdmixtureEvents, allowSisterAdmixture, 10.0) );
        moves.push_back( new AdmixtureEdgeRemoveResidualWeights( tau, admixtureRate, admixtureCount, residuals, residualWeight, delay, 10.0) );
        moves.push_back( new AdmixtureEdgeReplaceResidualWeights( tau, admixtureRate, branchRates_nonConst, residuals, residualWeight, delay, allowSisterAdmixture, 20.0) );
        //moves.push_back( new AdmixtureEdgeMultiRemove( tau, admixtureRate, admixtureCount, residuals, residualWeight, delay, 2.0 ) );
      
        //moves.push_back( new AdmixtureReplaceAndNNI(  tau, 0.5, 10.0) );
        //moves.push_back( new AdmixtureEdgeAddCladeResiduals( tau, admixtureRate, admixtureCount, residuals, delay, maxNumberOfAdmixtureEvents, allowSisterAdmixture, 2.0) );
        //moves.push_back( new AdmixtureEdgeReplaceCladeResiduals( tau, admixtureRate, branchRates_nonConst, residuals, delay, allowSisterAdmixture, 15.0) );
        
        if (updateTopology)
        {
            moves.push_back( new AdmixtureEdgeDivergenceMerge( tau, admixtureRate, branchRates_nonConst, admixtureCount, residuals, delay, allowSisterAdmixture, 5.0 ));
            moves.push_back( new AdmixtureEdgeRegraftReplace( tau, residuals, 1.0, delay, maxNumberOfAdmixtureEvents, allowSisterAdmixture, 5.0));
            ;
            
        }
        
        moves.push_back( new AdmixtureEdgeReweight( tau, delay, 10.0, 10.0) );
        moves.push_back( new AdmixtureEdgeReversePolarity( tau, delay, 2.0, 10.0) );
        moves.push_back( new AdmixtureEdgeSlide( tau, branchRates_nonConst, delay, allowSisterAdmixture, 10.0, 10.0) );
        moves.push_back( new AdmixtureEdgeFNPR( tau, branchRates_nonConst, delay, allowSisterAdmixture, 10.0, 10.0) );
        moves.push_back( new MetropolisHastingsMove( new ScaleProposal(admixtureRate, 0.1), 5, false ) );

    }
    

    
    // MONITORS
    std::cout << "Adding monitors\n";
    RbVector<Monitor> monitors;
    
    // parameter monitor
    std::vector<DagNode*> monitoredNodes;
    monitoredNodes.push_back( diffusionRate );
    monitoredNodes.push_back( admixtureRate );
    monitoredNodes.push_back( diversificationRate );
    monitoredNodes.push_back( turnover );
    monitoredNodes.push_back( admixtureCount );
    
    if (useBranchRates)
    {
        for( size_t i=0; i<numBranches; i++){
            monitoredNodes.push_back( branchRates_nonConst[i] );
        }
    }
    
    monitors.push_back( new FileMonitor( monitoredNodes, 1, "/Users/mlandis/data/admix/output/" + outName + ".parameters.txt", "\t", true, true, true, useParallelMcmcmc, useParallelMcmcmc, useParallelMcmcmc ) );
    monitors.push_back( new ScreenMonitor( monitoredNodes, 1, "\t" ) );
 
    monitors.push_back( new AdmixtureBipartitionMonitor(tau, diffusionRate, br_vector, numTreeResults, numAdmixtureResults, 1, "/Users/mlandis/data/admix/output/" + outName + ".bipartitions.txt", "\t", true, true, true, true, true, true ) );
    monitors.push_back( new AdmixtureResidualsMonitor(residuals, snps->getPopulationNames(), 10, "/Users/mlandis/data/admix/output/" + outName + ".residuals.txt", "\t", true, true, true, true ) );

    //monitors.push_back( new ExtendedNewickAdmixtureTreeMonitor( tau, br_vector, true, true, 10, "/Users/mlandis/data/admix/output/" + outName + ".admixture_trees.txt", "\t", true, true, true, true ) );
    //monitors.push_back( new ExtendedNewickAdmixtureTreeMonitor( tau, br_vector, false, true, 10, "/Users/mlandis/data/admix/output/" + outName + ".topology_trees.trees", "\t", true, true, true, true ) );
    monitors.push_back( new ExtendedNewickAdmixtureTreeMonitor( tau, br_vector, false, false, 10, "/Users/mlandis/data/admix/output/" + outName + ".time_trees.trees", "\t", true, true, true, true ) );
    
    
    
    
    // MODEL
    std::cout << "Calling model\n";
    std::set<const DagNode*> mset;
    mset.insert(admixtureRate);
    Model myModel = Model(mset);
    
    
    // MCMC
    std::cout << "Calling mcmc\n";
    if (!useParallelMcmcmc)
    {
        Mcmc myMcmc = Mcmc(myModel, moves, monitors);
        myMcmc.run(mcmcGenerations);
        myMcmc.printOperatorSummary();
    }
    else
    {
        ParallelMcmcmc myPmc3(myModel, moves, monitors, "random", numChains, numProcesses, swapInterval, deltaTemp, sigmaTemp, startingHeat);
        myPmc3.run(mcmcGenerations/divGens);
        myPmc3.printOperatorSummary();
    }

    std::cout << "All done!\n";
    
    
    // OBJECT CLEANUP
    delete snps;
    delete a_bm;
    delete b_bm;
    delete c;
    delete tau;
    delete diversificationRate;
    delete turnover;
    delete diffusionRate;
    delete admixtureRate;
    delete admixtureCount;
    delete branchRateA;
    delete branchRateB;
    delete branchRateC;
    delete branchRateD;
    branchRates_nonConst.clear();
    branchRates.clear();
    delete br_vector;
    //delete bmag; // malloc deallocation error
    delete admixtureModel;
    delete residuals;
    
    moves.clear();
    monitors.clear();
    
    return true;
}
bool TestUCLNRelaxedClockBHT92Model::run( void ) {
    
    std::vector<unsigned int> seeds;
    seeds.push_back(7);
    seeds.push_back(4);
    GLOBAL_RNG->setSeed( seeds );
    
    /* First, we read in the data */
    // the matrix
    std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename);
    std::cout << "Read " << data.size() << " matrices." << std::endl;
    std::cout << data[0] << std::endl;
    
    std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename );
    std::cout << "Read " << trees.size() << " trees." << std::endl;
    std::cout << trees[0]->getNewickRepresentation() << std::endl;
    
    
    /* set up the model graph */
    
    //////////////////////
    // first the priors //
    //////////////////////
    
    // birth-death process priors
    StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(100.0)) ));
    ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0));
    ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0));
    
	
    // Setting up the substitution model //
	
    //ts/tv ratio:
    ConstantNode<double > *tstv_prior = new ConstantNode<double >( "tstv_prior", new double(0.25) );
    ContinuousStochasticNode *tstv = new ContinuousStochasticNode("tstv", new ExponentialDistribution(tstv_prior) );
	
    //GC content prior:
    ConstantNode<double > *eq_gc_prior = new ConstantNode<double >( "eq_gc_prior_ab", new double(1.0) );    
	
    //Root GC frequency
    
    StochasticNode< double  > *omega = new StochasticNode< double >( "omega", new BetaDistribution(eq_gc_prior,eq_gc_prior) );
    DeterministicNode<std::vector<double> > *rf = new DeterministicNode< std::vector<double> >( "rf", new NucleotideFrequenciesFromGcContentFunction( omega ) );
	
    std::cout << "omega:\t" << omega->getValue() << std::endl;
    std::cout << "rf:\t" << rf->getValue() << std::endl;
    std::cout << "tstv:\t" << tstv->getValue() << std::endl;
    
    //Declaring a vector of matrices, one per branch
    size_t numBranches = 2*data[0]->getNumberOfTaxa() - 2;
    std::vector<ContinuousStochasticNode*> thetas;
    std::vector< const TypedDagNode < RateMatrix >* > qs;
	
	//Equilibrium GC frequency: one per branch, defined in the loop along with the T92 rate matrices.
    for (unsigned int i = 0 ; i < numBranches ; i++ ) {
        std::ostringstream eq_gc_name;
        eq_gc_name << "eq_gc(" << i << ")";
        thetas.push_back(new ContinuousStochasticNode( eq_gc_name.str(), new BetaDistribution(eq_gc_prior,eq_gc_prior) ) );
		std::ostringstream q_name;
        q_name << "q(" << i << ")";
		qs.push_back(new DeterministicNode< RateMatrix >( q_name.str(), new Tamura92RateMatrixFunction( thetas[i], tstv) ));
        //std::cout << "Matrix Q:\t"<<i<<"\t" << qs[i]->getValue() << std::endl;
    }
    
	//Build a node out of the vector of nodes
    DeterministicNode< RbVector< RateMatrix > >* qs_node = new DeterministicNode< RbVector< RateMatrix > >( "q_vector", new RbVectorFunction<RateMatrix>(qs) );
    
	
	// Setting up the relaxed clock model //

    ConstantNode<double> *a = new ConstantNode<double>("a", new double(0.5) );
    ConstantNode<double> *b = new ConstantNode<double>("b", new double(0.25) );

	
	std::vector<const TypedDagNode<double> *> branchRates;
	std::vector< ContinuousStochasticNode *> branchRates_nonConst;
	for( size_t i=0; i<numBranches; i++){
        std::ostringstream br_name;
        br_name << "br(" << i << ")";
		ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new LognormalDistribution(a, b, new ConstantNode<double>("offset", new double(0.0) )));
		branchRates.push_back( tmp_branch_rate );
		branchRates_nonConst.push_back( tmp_branch_rate );
	}
	//Build a node out of the vector of nodes
    DeterministicNode< std::vector< double > >* br_vector = new DeterministicNode< std::vector< double > >( "br_vector", new VectorFunction< double >( branchRates ) );

	
	
	// Putting it all together //

	
    std::vector<std::string> names = data[0]->getTaxonNames();
    ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( trees[0]->getRoot().getAge()*2.0 ) );
    StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, div, turn, rho, "uniform", "survival", int(names.size()), names, std::vector<Clade>()) );
    
	//If we want to get a good starting tree
	//    tau->setValue( trees[0] );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
    
    // and the character model
	//    StochasticNode<CharacterData<DnaState> > *charactermodel = new StochasticNode<CharacterData <DnaState> >("S", new SimpleGTRBranchRateTimeCharEvoModel<DnaState, TimeTree>(tau, q, br_vector, true, data[0]->getNumberOfCharacters()) );
    
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters());
    phyloCTMC->setRootFrequencies( rf );
    phyloCTMC->setRateMatrix( qs_node );
    phyloCTMC->setClockRate( br_vector );
	StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC );
	charactermodel->clamp( data[0] );
    	
    
    /* add the moves */
    RbVector<Move> moves;
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 2, true ) );
    moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) );
    moves.push_back( new NarrowExchange( tau, 10.0 ) );
    moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) );
    moves.push_back( new SubtreeScale( tau, 5.0 ) );
    moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) );
    moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) );
    moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) );
    moves.push_back( new BetaSimplexMove( omega, 10.0, true, 2.0 ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(tstv, 1.0), 2, true ) );
	
    for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
        moves.push_back( new BetaSimplexMove( dynamic_cast<StochasticNode<double>* >(thetas[i]), 10.0, true, 2.0 ) );
        moves.push_back( new SlidingMove( thetas[i], 0.05, true, 2.0) );
        //    moves.push_back( new SimplexMove( ers[i], 100.0, 6, true, 2.0 ) );
		//        moves.push_back( new SimplexMove( pis[i], 100.0, 4, true, 2.0 ) );
    }
    
    // add some tree stats to monitor
    DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    std::set<DagNode*> monitoredNodes;
    //    monitoredNodes.insert( er );
    //    monitoredNodes.insert( pi );
    monitoredNodes.insert( div );
    monitors.push_back( new FileMonitor( monitoredNodes, 10, "TestUCLNRelaxedClockBHT92Model.log", "\t" ) );
    std::set<DagNode*> monitoredNodes1;
	//    monitoredNodes1.insert( er );
    for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
        monitoredNodes1.insert( thetas[i] );
    }
    monitoredNodes1.insert( rf );
    monitoredNodes1.insert( treeHeight );
    monitors.push_back( new FileMonitor( monitoredNodes1, 10, "TestUCLNRelaxedClockBHT92ModelSubstRates.log", "\t" ) );
    monitors.push_back( new ScreenMonitor( monitoredNodes1, 10, "\t" ) );
    std::set<DagNode*> monitoredNodes2;
    monitoredNodes2.insert( tau );
    monitors.push_back( new FileMonitor( monitoredNodes2, 10, "TestUCLNRelaxedClockBHT92Model.tree", "\t", false, false, false ) );
    
    /* instantiate the model */
    Model myModel = Model(qs[0]);
    
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    myMcmc.printOperatorSummary();
    
    /* clean up */
    //    for (size_t i = 0; i < 10; ++i) {
    //        delete x[i];
    //    }
    //    delete [] x;
    delete div;
    //    delete sigma;
    //    delete a;
    //    delete b;
    //    delete c;
    
    std::cout << "Finished GTR model test." << std::endl;
    
    return true;
}
Ejemplo n.º 10
0
bool TestACLNRatesGen::run( void ) {

//    alignmentFilename = "/Users/tracyh/Code/RevBayes_proj/tests/time_trees/tt_CLK_GTRG.nex";
//    treeFilename = "/Users/tracyh/Code/RevBayes_proj/tests/time_trees/tt_CLK_true_relx.tre";
	
	std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename);
    std::cout << "Read " << data.size() << " matrices." << std::endl;
    std::cout << data[0] << std::endl;
	
	// First, we read in the data 
    std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename );
    std::cout << "Read " << trees.size() << " trees." << std::endl;
    std::cout << trees[0]->getNewickRepresentation() << std::endl;
    
    
	// #######################################
    // ###### birth-death process priors #####
	// #######################################
	
	//   Constant nodes
	ConstantNode<double> *dLambda = new ConstantNode<double>("div_rate", new double(1.0 / 5.0));		// Exponential rate for prior on div
	ConstantNode<double> *turnA   = new ConstantNode<double>("turn_alpha", new double(2.0));			// Beta distribution alpha
	ConstantNode<double> *turnB   = new ConstantNode<double>("turn_beta", new double(2.0));				// Beta distribution beta
    ConstantNode<double> *rho     = new ConstantNode<double>("rho", new double(1.0));					// assume 100% sampling for now
	ConstantNode<double> *meanOT  = new ConstantNode<double>("meanOT", new double(trees[0]->getRoot().getAge()*1.5));
	ConstantNode<double> *stdOT   = new ConstantNode<double>("stdOT", new double(10.0));
	
	//   Stochastic nodes
    StochasticNode<double> *origin  = new StochasticNode<double>( "origin", new NormalDistribution(meanOT, stdOT) );
    StochasticNode<double> *div   = new StochasticNode<double>("diversification", new ExponentialDistribution(dLambda));
    StochasticNode<double> *turn  = new StochasticNode<double>("turnover", new BetaDistribution(turnA, turnB));
	
	//   Deterministic nodes
	//    birthRate = div / (1 - turn)
	DeterministicNode<double> *birthRate = new DeterministicNode<double>("birth_rate", new BirthRateConstBDStatistic(div, turn));
	//    deathRate = (div * turn) / ( 1 - turn)
	DeterministicNode<double> *deathRate = new DeterministicNode<double>("death_rate", new DeathRateConstBDStatistic(div, turn));
	// For some datasets with large root ages, if div>1.0 (or so), the probability is NaN
	RandomNumberGenerator* rng = GLOBAL_RNG;
	div->setValue(rng->uniform01() / 1.5);
	
	// Birth-death tree
    std::vector<std::string> names = data[0]->getTaxonNames();
    std::vector<RevBayesCore::Taxon> taxa;
    for (size_t i = 0; i < names.size(); ++i)
    {
        taxa.push_back( Taxon( names[i] ) );
    }
    StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, birthRate, deathRate, rho, "uniform", "nTaxa", taxa, std::vector<Clade>()) );

    DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) );
	
	
	// ##############################################
	// #### ACLN Model on Branch Rates #####
	// ##############################################
	
	size_t numBranches = 2 * data[0]->getNumberOfTaxa() - 2;
	size_t numNodes = numBranches + 1; // model rates at nodes
	
    ConstantNode<double> *a      = new ConstantNode<double>("a", new double(4.0) );
    ConstantNode<double> *b      = new ConstantNode<double>("b", new double(4.0) );
    ConstantNode<double> *anu    = new ConstantNode<double>("a_nu", new double(1.0) );
    ConstantNode<double> *bnu    = new ConstantNode<double>("b_nu", new double(8.0) );
	
	StochasticNode<double> *rootRate = new StochasticNode<double>("root.rate", new GammaDistribution(a, b));
	StochasticNode<double> *bmNu = new StochasticNode<double>("BM_var", new GammaDistribution(anu, bnu));
	
	size_t rootID = trees[0]->getRoot().getIndex();

	ConstantNode<double> *crInv  = new ConstantNode<double>("invCr", new double(1.0) );
	DeterministicNode<double> *scaleRate = new DeterministicNode<double>("scaleRate", new BinaryDivision<double, double, double>(crInv, treeHeight));

	StochasticNode< std::vector< double > > *nodeRates = new StochasticNode< std::vector< double > >( "NodeRates", new AutocorrelatedLognormalRateDistribution(tau, bmNu, rootRate, scaleRate) );
	
	std::cout << nodeRates->getValue().size() << std::endl;
	

	std::vector<const TypedDagNode<double> *> branchRates;
	for( size_t i=0; i<numBranches; i++){
		std::ostringstream brName;
        brName << "br(" << i << ")";
		DeterministicNode<double> *tmpBrRt = new DeterministicNode<double>(brName.str(), new RateOnBranchAve(nodeRates, tau, scaleRate, i));
		branchRates.push_back( tmpBrRt );
	}
    DeterministicNode< std::vector< double > >* brVector = new DeterministicNode< std::vector< double > >( "branchRates", new VectorFunction< double >( branchRates ) );
	
	// making a combined DagNode for a compound move
	std::vector<DagNode*> treeAndRates;
	treeAndRates.push_back( tau );
	treeAndRates.push_back(nodeRates);
	treeAndRates.push_back(rootRate);

	
	// ####################################
	
	
    // ###### GTR model priors ######
	//    Constant nodes
    ConstantNode<std::vector<double> > *bf   = new ConstantNode<std::vector<double> >( "bf", new std::vector<double>(4,1.0) );
    ConstantNode<std::vector<double> > *e    = new ConstantNode<std::vector<double> >( "e", new std::vector<double>(6,1.0) );
    //    Stochastic nodes
    StochasticNode<std::vector<double> > *pi = new StochasticNode<std::vector<double> >( "pi", new DirichletDistribution(bf) );
    StochasticNode<std::vector<double> > *er = new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) );
	
    DeterministicNode<RateMatrix> *q = new DeterministicNode<RateMatrix>( "Q", new GtrRateMatrixFunction(er, pi) );
    std::cout << "Q:\t" << q->getValue() << std::endl;

	// ####### Gamma Rate Het. ######
	
	ConstantNode<double> *shapePr = new ConstantNode<double>("gammaShPr", new double(0.5));
	StochasticNode<double> *srAlpha = new StochasticNode<double>("siteRates.alpha", new ExponentialDistribution(shapePr));
    ConstantNode<double> *q1 = new ConstantNode<double>("q1", new double(0.125) );
    DeterministicNode<double> *q1Value = new DeterministicNode<double>("q1_value", new QuantileFunction(q1, new GammaDistribution(srAlpha, srAlpha) ) );
    ConstantNode<double> *q2 = new ConstantNode<double>("q2", new double(0.375) );
    DeterministicNode<double> *q2Value = new DeterministicNode<double>("q2_value", new QuantileFunction(q2, new GammaDistribution(srAlpha, srAlpha) ) );
    ConstantNode<double> *q3 = new ConstantNode<double>("q3", new double(0.625) );
    DeterministicNode<double> *q3Value = new DeterministicNode<double>("q3_value", new QuantileFunction(q3, new GammaDistribution(srAlpha, srAlpha) ) );
    ConstantNode<double> *q4 = new ConstantNode<double>("q4", new double(0.875) );
    DeterministicNode<double> *q4Value = new DeterministicNode<double>("q4_value", new QuantileFunction(q4, new GammaDistribution(srAlpha, srAlpha) ) );
    std::vector<const TypedDagNode<double>* > gammaRates = std::vector<const TypedDagNode<double>* >();
    gammaRates.push_back(q1Value);
    gammaRates.push_back(q2Value);
    gammaRates.push_back(q3Value);
    gammaRates.push_back(q4Value);
    DeterministicNode<std::vector<double> > *siteRates = new DeterministicNode<std::vector<double> >( "site_rates", new VectorFunction<double>(gammaRates) );
    DeterministicNode<std::vector<double> > *siteRatesNormed = new DeterministicNode<std::vector<double> >( "site_rates_norm", new NormalizeVectorFunction(siteRates) );
    
	
	tau->setValue( trees[0] );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
	std::cout << " ** origin   " << origin->getValue() << std::endl;
	std::cout << " ** root age " << trees[0]->getRoot().getAge() << std::endl;
	
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters());
	phyloCTMC->setClockRate( brVector ); 
    phyloCTMC->setRateMatrix( q );
	phyloCTMC->setSiteRates( siteRatesNormed );
    StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC );
	charactermodel->clamp( data[0] );
	
	std::cout << " diversification: " << div->getValue() << std::endl;
	std::cout << " turnover: " << turn->getValue() << std::endl;
	std::cout << " birth rate: " << birthRate->getValue() << std::endl;
	std::cout << " death rate: " << deathRate->getValue() << std::endl;
	
	/* add the moves */
    RbVector<Move> moves;
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 1.0, true ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(turn, 1.0), 1.0, true ) );
	//	moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) );
	//	moves.push_back( new NarrowExchange( tau, 10.0 ) );
	//	moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) );
	//	moves.push_back( new SubtreeScale( tau, 5.0 ) );
	//	moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) );
	moves.push_back( new RootTimeSlide( tau, 50.0, true, 10.0 ) );
	moves.push_back( new OriginTimeSlide( origin, tau, 50.0, true, 10.0 ) );
	moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) );
	moves.push_back( new SimplexMove( er, 450.0, 6, 0, true, 2.0, 0.5 ) );
	moves.push_back( new SimplexMove( pi, 250.0, 4, 0, true, 2.0, 0.5 ) ); 
	moves.push_back( new SimplexMove( er, 200.0, 1, 0, false, 0.5 ) );
	moves.push_back( new SimplexMove( pi, 100.0, 1, 0, false, 0.5 ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(srAlpha, log(2.0)), 1, true ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(bmNu, 0.75), 4, true ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(rootRate, 0.5), 2, false ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(rootRate, 1.0), 2, false ) );
	moves.push_back( new ScaleSingleACLNRatesMove( nodeRates, 1.0, false, 8.0 * (double)numNodes) );
	moves.push_back( new ScaleSingleACLNRatesMove( nodeRates, 2.0, false, 8.0 * (double)numNodes) );
	moves.push_back( new RateAgeACLNMixingMove( treeAndRates, 0.02, false, 2.0 ) ); 
	
    // add some tree stats to monitor
	DeterministicNode<double> *meanNdRate = new DeterministicNode<double>("MeanNodeRate", new MeanVecContinuousValStatistic(nodeRates) );
	
    /* add the monitors */
    RbVector<Monitor> monitors;
    std::vector<DagNode*> monitoredNodes;
	monitoredNodes.push_back( meanNdRate );
	monitoredNodes.push_back( treeHeight );
	monitoredNodes.push_back( origin );
	monitoredNodes.push_back( nodeRates );
	monitoredNodes.push_back( rootRate );
	monitoredNodes.push_back( bmNu );
	monitoredNodes.push_back( scaleRate );
	monitors.push_back( new ScreenMonitor( monitoredNodes, 10, "\t" ) );
	
	monitoredNodes.push_back( div );
	monitoredNodes.push_back( turn );
	monitoredNodes.push_back( birthRate );
	monitoredNodes.push_back( deathRate );
	monitoredNodes.push_back( pi );
    monitoredNodes.push_back( er );
    monitoredNodes.push_back( srAlpha );
	monitoredNodes.push_back( brVector );
	
	std::string logFN = "clock_test/test_rb_ACLN_6June_rn_3.log";
	monitors.push_back( new FileMonitor( monitoredNodes, 10, logFN, "\t" ) );
	
    std::set<DagNode*> monitoredNodes2;
    monitoredNodes2.insert( tau );
	
//	std::string treFN = "clock_test/test_rb_ACLN_6June_pr.tre";
//	monitors.push_back( new FileMonitor( monitoredNodes2, 10, treFN, "\t", false, false, false ) );
    
    /* instantiate the model */
    Model myModel = Model(q);
	
	mcmcGenerations = 200000;

    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    myMcmc.printOperatorSummary();
	
	
	/* clean up */
	//	delete div;
	//	delete turn;
	//	delete rho;
	//	delete cp;
	//	delete branchRates;
	//	delete q;
	//	delete tau;
	delete charactermodel;
	//	delete a;
	//	delete birthRate;
	//	delete phyloCTMC;
	//	delete dLambda;
	
	
	monitors.clear();
	moves.clear();
	
    return true;
}
Ejemplo n.º 11
0
bool TestGtrGammaModel::run( void ) {
    
    /* First, we read in the data */
    // the matrix
    NclReader& reader = NclReader::getInstance();
    std::vector<AbstractCharacterData*> data = reader.readMatrices(alignmentFilename);
    std::cout << "Read " << data.size() << " matrices." << std::endl;
    
    std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename );
    std::cout << "Read " << trees.size() << " trees." << std::endl;
    std::cout << trees[0]->getNewickRepresentation() << std::endl;
    
    /* set up the model graph */
    
    //////////////////////
    // first the priors //
    //////////////////////
    
    // birth-death process priors
    StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("", new double(0.0)), new ConstantNode<double>("", new double(100.0)) ));
    ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0));
    ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0));
    // gtr model priors
    ConstantNode<std::vector<double> > *bf = new ConstantNode<std::vector<double> >( "bf", new std::vector<double>(4,1.0) );
    ConstantNode<std::vector<double> > *e = new ConstantNode<std::vector<double> >( "e", new std::vector<double>(6,1.0) );
    
    std::cout << "bf:\t" << bf->getValue() << std::endl;
    std::cout << "e:\t" << e->getValue() << std::endl;
    
    // then the parameters
    StochasticNode<std::vector<double> > *pi = new StochasticNode<std::vector<double> >( "pi", new DirichletDistribution(bf) );
    StochasticNode<std::vector<double> > *er = new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) );
    
    //Rate heterogeneity
    ConstantNode<double> *alpha_prior = new ConstantNode<double>("alpha_prior", new double(0.5) );
    ContinuousStochasticNode *alpha = new ContinuousStochasticNode("alpha", new ExponentialDistribution(alpha_prior) );
    
    alpha->setValue( new double(0.5) );
    std::cout << "alpha:\t" << alpha->getValue() << std::endl;
    
    ConstantNode<double> *q1 = new ConstantNode<double>("q1", new double(0.125) );
    DeterministicNode<double> *q1_value = new DeterministicNode<double>("q1_value", new QuantileFunction(q1, new GammaDistribution(alpha, alpha) ) );
//    StochasticNode<double> *q1_value = new StochasticNode<double>("q1_value", new GammaDistribution(alpha, alpha) );
    ConstantNode<double> *q2 = new ConstantNode<double>("q2", new double(0.375) );
    DeterministicNode<double> *q2_value = new DeterministicNode<double>("q2_value", new QuantileFunction(q2, new GammaDistribution(alpha, alpha) ) );
//    StochasticNode<double> *q2_value = new StochasticNode<double>("q2_value", new GammaDistribution(alpha, alpha) );
    ConstantNode<double> *q3 = new ConstantNode<double>("q3", new double(0.625) );
    DeterministicNode<double> *q3_value = new DeterministicNode<double>("q3_value", new QuantileFunction(q3, new GammaDistribution(alpha, alpha) ) );
//    StochasticNode<double> *q3_value = new StochasticNode<double>("q3_value", new GammaDistribution(alpha, alpha) );
    ConstantNode<double> *q4 = new ConstantNode<double>("q4", new double(0.875) );
    DeterministicNode<double> *q4_value = new DeterministicNode<double>("q4_value", new QuantileFunction(q4, new GammaDistribution(alpha, alpha) ) );
//    StochasticNode<double> *q4_value = new StochasticNode<double>("q4_value", new GammaDistribution(alpha, alpha) );
    std::vector<const TypedDagNode<double>* > gamma_rates = std::vector<const TypedDagNode<double>* >();
    gamma_rates.push_back(q1_value);
    gamma_rates.push_back(q2_value);
    gamma_rates.push_back(q3_value);
    gamma_rates.push_back(q4_value);
    
    DeterministicNode<std::vector<double> > *site_rates = new DeterministicNode<std::vector<double> >( "site_rates", new VectorFunction<double>(gamma_rates) );
    // currently unused
    // ConstantNode<std::vector<double> > *site_rate_probs = new ConstantNode<std::vector<double> >( "site_rate_probs", new std::vector<double>(4,1.0/4.0) );

    DeterministicNode<std::vector<double> > *site_rates_norm = new DeterministicNode<std::vector<double> >( "site_rates_norm", new NormalizeVectorFunction(site_rates) );
    
    pi->setValue( new std::vector<double>(4,1.0/4.0) );
    er->setValue( new std::vector<double>(6,1.0/6.0) );
    
    std::cout << "pi:\t" << pi->getValue() << std::endl;
    std::cout << "er:\t" << er->getValue() << std::endl;
    std::cout << "rates:\t" << site_rates->getValue() << std::endl;
    std::cout << "rates:\t" << site_rates_norm->getValue() << std::endl;

    DeterministicNode<RateMatrix> *q = new DeterministicNode<RateMatrix>( "Q", new GtrRateMatrixFunction(er, pi) );
    
    std::cout << "Q:\t" << q->getValue() << std::endl;
    
    std::vector<std::string> names = data[0]->getTaxonNames();
    ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( trees[0]->getRoot().getAge()*2.0 ) );
    std::vector<RevBayesCore::Taxon> taxa;
    for (size_t i = 0; i < names.size(); ++i)
    {
        taxa.push_back( Taxon( names[i] ) );
    }
    StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, div, turn, rho, "uniform", "survival", taxa, std::vector<Clade>()) );
    
    tau->setValue( trees[0] );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
    
    // and the character model
// (unused)    size_t numChar = data[0]->getNumberOfCharacters();
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters());
    phyloCTMC->setSiteRates( site_rates_norm );
    phyloCTMC->setRateMatrix( q );
    StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC );
    charactermodel->clamp( static_cast<DiscreteCharacterData<DnaState> *>( data[0] ) );
    
    std::cout << "LnL:\t\t" << charactermodel->getLnProbability() << std::endl;
    
    /* add the moves */
    RbVector<Move> moves;
//    moves.push_back( new ScaleMove(div, 1.0, true, 2.0) );
//    moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) );
//    moves.push_back( new NarrowExchange( tau, 10.0 ) );
//    moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) );
//    moves.push_back( new SubtreeScale( tau, 5.0 ) );
//    moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) );
//    moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) );
//    moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) );
//    moves.push_back( new SimplexMove( er, 10.0, 1, 0, true, 2.0 ) );
//    moves.push_back( new SimplexMove( pi, 10.0, 1, 0, true, 2.0 ) );
//    moves.push_back( new SimplexMove( er, 100.0, 6, 0, true, 2.0 ) );
//    moves.push_back( new SimplexMove( pi, 100.0, 4, 0, true, 2.0 ) );
    
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(alpha, 1.0), 1, true) );
//    moves.push_back( new ScaleMove(q1_value, 1.0, true, 2.0) );
//    moves.push_back( new ScaleMove(q2_value, 1.0, true, 2.0) );
//    moves.push_back( new ScaleMove(q3_value, 1.0, true, 2.0) );
//    moves.push_back( new ScaleMove(q4_value, 1.0, true, 2.0) );
    
    // add some tree stats to monitor
    DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    std::set<DagNode*> monitoredNodes;
//    monitoredNodes.insert( er );
//    monitoredNodes.insert( pi );
//    monitoredNodes.insert( q );
//    monitoredNodes.insert( q1_value );
//    monitoredNodes.insert( q2_value );
//    monitoredNodes.insert( q3_value );
//    monitoredNodes.insert( q4_value );
    monitoredNodes.insert( site_rates_norm );
    monitoredNodes.insert( alpha );
    monitoredNodes.insert( treeHeight );
    monitors.push_back( new FileMonitor( monitoredNodes, 1000, "TestGtrGammaModelSubstRates.log", "\t" ) );
    monitors.push_back( new ScreenMonitor( monitoredNodes, 1000, "\t" ) );
    std::set<DagNode*> monitoredNodes2;
    monitoredNodes2.insert( tau );
    monitors.push_back( new FileMonitor( monitoredNodes2, 1000, "TestGtrGammaModel.tree", "\t", false, false, false ) );
    
    /* instantiate the model */
    Model myModel = Model(q);
    
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    myMcmc.printOperatorSummary();
    
    /* clean up */
    //    for (size_t i = 0; i < 10; ++i) {
    //        delete x[i];
    //    }
    //    delete [] x;
    delete div;
    //    delete sigma;
    //    delete a;
    //    delete b;
    //    delete c;
    
    std::cout << "Finished GTR+Gamma model test." << std::endl;
    
    return true;
}
bool TestFilteredStandardLikelihood::run( void ) {
    std::cerr << "  starting TestFilteredStandardLikelihood...\n" ;
    /* First, we read in the data */
    // the matrix
    NclReader reader = NclReader();
    std::vector<AbstractCharacterData*> data = reader.readMatrices(alignmentFilename);
    AbstractDiscreteCharacterData * discrD = dynamic_cast<AbstractDiscreteCharacterData *>(data[0]);
#   if defined(USE_TIME_TREE)
        std::vector<TimeTree*> trees = reader.readTimeTrees( treeFilename );
        ConstantNode<TimeTree> *tau = new ConstantNode<TimeTree>( "tau", new TimeTree( *trees[0] ) );
#   else
        std::vector<BranchLengthTree*> *trees = reader.readBranchLengthTrees( treeFilename );
        ConstantNode<BranchLengthTree> *tau = new ConstantNode<BranchLengthTree>( "tau", new BranchLengthTree( *(*trees)[0] ) );
#   endif
    std::cout << "tau:\t" << tau->getValue() << std::endl;
#   if defined(USE_3_STATES)
        const size_t numStates = 3;
#   else
        const size_t numStates = 4;
#   endif
    size_t numChar = discrD->getNumberOfCharacters();

#   if defined(USE_RATE_HET)
        ConstantNode<double>* shape = new ConstantNode<double>("alpha", new double(0.5) );
        ConstantNode<double>* rate = new ConstantNode<double>("", new double(0.5) );
        ConstantNode<int>* numCats = new ConstantNode<int>("ncat", new int(4) );

        DiscretizeGammaFunction *dFunc = new DiscretizeGammaFunction( shape, rate, numCats, false );
        DeterministicNode<RbVector<double> > *site_rates_norm_2 = new DeterministicNode<RbVector<double> >( "site_rates_norm", dFunc );
        std::cout << "rates:\t" << site_rates_norm_2->getValue() << std::endl;
#   endif

#if defined(USE_3_STATES) && defined(USE_NUCLEOTIDE)
#error "cannot use 3 state and nucleotide type"
#endif
#if defined(USE_3_STATES) && defined(USE_GTR_RATE_MAT)
#error "cannot use 3 state and USE_GTR_RATE_MAT"
#endif

#   if defined(USE_GTR_RATE_MAT)
        ConstantNode<RbVector<double> > *pi = new ConstantNode<RbVector<double> >( "pi", new RbVector<double>(4, 1.0/4.0) );
        ConstantNode<RbVector<double> > *er = new ConstantNode<RbVector<double> >( "er", new RbVector<double>(6, 1.0/6.0) );
        DeterministicNode<RateMatrix> *q = new DeterministicNode<RateMatrix>( "Q", new GtrRateMatrixFunction(er, pi) );
        std::cout << "Q:\t" << q->getValue() << std::endl;
#       if defined (USE_NUCLEOTIDE)
#           if defined(USE_TIME_TREE)
                PhyloCTMCSiteHomogeneousNucleotide<DnaState, TimeTree> *charModel = new PhyloCTMCSiteHomogeneousNucleotide<DnaState, TimeTree>(tau, false, numChar);
#           else
                PhyloCTMCSiteHomogeneousNucleotide<DnaState, BranchLengthTree> *charModel = new PhyloCTMCSiteHomogeneousNucleotide<DnaState, BranchLengthTree>(tau, false, numChar );
#           endif
#       else
#           if defined(USE_TIME_TREE)
                PhyloCTMCSiteHomogeneous<DnaState, TimeTree> *charModel = new PhyloCTMCSiteHomogeneous<DnaState, TimeTree>(tau, 4, false, numChar);
#           else
                PhyloCTMCSiteHomogeneous<DnaState, BranchLengthTree> *charModel = new PhyloCTMCSiteHomogeneous<DnaState, BranchLengthTree>(tau, 4, false, numChar );
#           endif
#       endif
#   else
        DeterministicNode<RateMatrix> *q = new DeterministicNode<RateMatrix>( "Q", new JcRateMatrixFunction(numStates));
#       if defined (USE_NUCLEOTIDE)
#          if defined(USE_TIME_TREE)
                PhyloCTMCSiteHomogeneousNucleotide<StandardState, TimeTree> *charModel = new PhyloCTMCSiteHomogeneousNucleotide<StandardState, TimeTree>(tau, false, numChar);
#           else
                PhyloCTMCSiteHomogeneousNucleotide<StandardState, BranchLengthTree> *charModel = new PhyloCTMCSiteHomogeneousNucleotide<StandardState, BranchLengthTree>(tau, false, numChar );
#           endif
#       else
#          if defined(USE_TIME_TREE)
                PhyloCTMCSiteHomogeneous<StandardState, TimeTree> *charModel = new PhyloCTMCSiteHomogeneous<StandardState, TimeTree>(tau, numStates, false, numChar);
#           else
                PhyloCTMCSiteHomogeneous<StandardState, BranchLengthTree> *charModel = new PhyloCTMCSiteHomogeneous<StandardState, BranchLengthTree>(tau, numStates, false, numChar );
#           endif
#       endif
#   endif
#   if defined(USE_RATE_HET)
        charModel->setSiteRates( site_rates_norm_2 );
#   endif
    charModel->setRateMatrix( q );
    StochasticNode< AbstractDiscreteCharacterData > *charactermodel = new StochasticNode< AbstractDiscreteCharacterData >("S", charModel);
    charactermodel->clamp( discrD );
    double lnp = charactermodel->getLnProbability();
    std::cerr << "    lnProb = " << lnp << std::endl;
#   if defined(USE_3_STATES)
#       if defined(USE_RATE_HET)
            const double paupLnL = lnp; // can't check this against paup....
#       else
            const double paupLnL = -813.23060;
#       endif
#   else
#       if defined(USE_RATE_HET)
            const double paupLnL = -900.9122;
#       else
            const double paupLnL = -892.5822;
#       endif
#   endif
    const double tol = 0.01;
    if (fabs(lnp - paupLnL) > tol) {
        std::cerr << "    deviates too much from the likelihood from PAUP* of " << paupLnL << std::endl;
        return false;
    }
    if (lnp >= 0.0) {
        std::cerr << "    lnProb is too high!" << std::endl;
        return false;
    }

    std::cout << "RevBayes LnL:\t\t" << charactermodel->getLnProbability() << std::endl;
    std::cout << "Finished GTR+Gamma model test." << std::endl;
    return true;
}