Ejemplo n.º 1
0
bool TestScalingMove::run( void ) {
    
    /* set up the model graph */
    
    // first the priors on mu 
    ConstantNode<double> *a = new ConstantNode<double>( "a", new double(0.10) );
    // then x
    StochasticNode<double> *x = new StochasticNode<double>( "x", new ExponentialDistribution(a) );
    
    /* add the moves */
    RbVector<Move> moves;
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(x, 1.0), 1, true ) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    monitors.push_back( new FileMonitor( x, 1, "ScalingMoveTest.log", "\t" ) );
    
    /* instantiate the model */
    Model myModel = Model(a);
    
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    
    /* clean up */
    delete x;
    delete a;
    
    return true;
}
Ejemplo n.º 2
0
void RateGenerator::executeMethod(const std::string &n, const std::vector<const DagNode*> &args, RbVector<RbVector<double> >& rv) const
{

    // clear old values
    rv.clear();
    
    TransitionProbabilityMatrix P(num_states);
    
    double rate = static_cast<const TypedDagNode<double> *>( args[0] )->getValue();
    double start_age = static_cast<const TypedDagNode<double> *>( args[1] )->getValue();
    double end_age = static_cast<const TypedDagNode<double> *>( args[2] )->getValue();
    
    calculateTransitionProbabilities( start_age, end_age, rate, P);
    
    for (size_t i = 0; i < num_states; i++)
    {
        RbVector<double> v;
        for (size_t j =0; j < num_states; j++)
        {
            v.push_back(P[i][j]);
        }
        rv.push_back(v);
    }
    
}
Ejemplo n.º 3
0
void ValidationAnalysis::runSim(size_t idx, size_t gen)
{
    // print some info
    if ( process_active )
    {
        size_t digits = size_t( ceil( log10( num_runs ) ) );
        std::cout << "Sim ";
        for (size_t d = size_t( ceil( log10( idx+1.1 ) ) ); d < digits; d++ )
        {
            std::cout << " ";
        }
        std::cout << (idx+1) << " / " << num_runs;
        std::cout << "\t\t";
        
        std::cout << std::endl;
    }
    
    // get the current sample
    MonteCarloAnalysis *analysis = runs[idx];
    
    // run the analysis
    RbVector<StoppingRule> rules;
    
    size_t currentGen = analysis->getCurrentGeneration();
    rules.push_back( MaxIterationStoppingRule(gen + currentGen) );
    
    analysis->run(gen, rules, false);
    
}
Ejemplo n.º 4
0
void RateGenerator::executeMethod(const std::string &n, const std::vector<const DagNode*> &args, RbVector<double> &rv) const
{
    size_t n_states = this->getNumberOfStates();
//    rv.resize(n_states);
    rv.clear();

    size_t from_idx = static_cast<const TypedDagNode<int> *>( args[0] )->getValue()-1;

    for (size_t to_idx = 0; to_idx < n_states; to_idx++)
    {
        rv.push_back(this->getRate(from_idx, to_idx, 0.0, 1.0));
    }
}
Ejemplo n.º 5
0
/**
 * Perform the proposal.
 *
 * An element swap simplex proposal simply selects two random elements of a simplex
 * and swaps them.
 *
 * \return The hastings ratio.
 */
double ElementSwapSimplexProposal::propose( RbVector<double> &value )
{
    
    // Get random number generator
    RandomNumberGenerator* rng     = GLOBAL_RNG;
    
    // store the value
    storedValue = value;
    
    // we need to know the number of categories
    size_t cats = value.size();
    
    // randomly draw two indices
    size_t chosen_index_1 = size_t( floor(rng->uniform01()*double(cats)) );
    size_t chosen_index_2 = size_t( floor(rng->uniform01()*double(cats)) );
    while (chosen_index_1 == chosen_index_2)
    {
        chosen_index_2 = size_t( floor(rng->uniform01()*double(cats)) );
    }
   
    // swap the values
    double value_1 = value[chosen_index_1];
    double value_2 = value[chosen_index_2];
    value[chosen_index_1] = value_2;
    value[chosen_index_2] = value_1;
    
    
    double ln_Hastins_ratio = 0;
    
    return ln_Hastins_ratio;
}
Ejemplo n.º 6
0
void PosteriorPredictiveAnalysis::runSim(MonteCarloAnalysis *sampler, size_t gen)
#endif
{

    // run the analysis
    RbVector<StoppingRule> rules;

    size_t currentGen = sampler->getCurrentGeneration();
    rules.push_back( MaxIterationStoppingRule(gen + currentGen) );

#ifdef RB_MPI
    sampler->run(gen, rules, c, false);
#else
    sampler->run(gen, rules, false);
#endif

}
Ejemplo n.º 7
0
void HeterogeneousRateBirthDeath::executeMethod(const std::string &n, const std::vector<const DagNode *> &args, RbVector<int> &rv) const
{
    
    if ( n == "numberEvents" )
    {
        size_t num_branches = branch_histories.getNumberBranches();
        rv.clear();
        rv.resize( num_branches );
        
        for (size_t i = 0; i < num_branches; ++i)
        {
            rv[i] = int(branch_histories[i].getNumberEvents());
        }
        
    }
    else
    {
        throw RbException("The heterogeneous rate birth-death process does not have a member method called '" + n + "'.");
    }
    
}
Ejemplo n.º 8
0
bool TestSimplexMove::run( void ) {
    
    /* set up the model graph */
    
    // first the priors on mu 
    ConstantNode<std::vector<double> > *a = new ConstantNode<std::vector<double> >( "a", new std::vector<double>(4,50) );
    // then x
    StochasticNode<std::vector<double> > *x = new StochasticNode<std::vector<double> >( "x", new DirichletDistribution(a) );
    
    std::vector<double> *x_val = new std::vector<double>();
    x_val->push_back(0.01);
    x_val->push_back(0.02);
    x_val->push_back(0.02);
    x_val->push_back(0.95);
    x->setValue( x_val );
    
    /* add the moves */
    RbVector<Move> moves;
    moves.push_back( new SimplexMove( x, 100.0, 4, 0, false, 1.0 ) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    monitors.push_back( new FileMonitor( x, 1, "SimplexMoveTest.log", "\t" ) );
    
    /* instantiate the model */
    Model myModel = Model(a);
    
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    
    /* clean up */
    delete x;
    delete a;
    
    return true;
}
Ejemplo n.º 9
0
StitchTreeFunction::StitchTreeFunction(const TypedDagNode<Tree> *bt, const TypedDagNode<RbVector<Tree> >* pc, RbVector<RbVector<Taxon> > pt) : TypedFunction<Tree>( new Tree() ),
backboneTree( bt ),
patchClades( pc ),
patchTaxa( pt ),
numPatches( pt.size() ),
haveIndex(false)
{
    // add the lambda parameter as a parent
    addParameter( bt );
    addParameter( pc );
    
    initTaxonGroups();
    
    value = new Tree();
    
    update();
}
Ejemplo n.º 10
0
bool TestAdmixtureGraph::run(void) {
    
    std::cout << "Running TestAdmixtureGraph\n";
    
    std::cout << "argc: " << argc << "\n";
    if (argc > 1)
    {
        for (int i = 0; i < argc; i++)
        {
            argTokens.push_back(argv[i]);
            std::cout << i << " " << argTokens[i] << "\n";
        }
        std::cout << argc << " == " << argTokens.size() << " arguments\n";
        snpFilename = argTokens[1];
    }
    
    // MODEL GRAPH
    std::vector<unsigned int> seed;
    seed = GLOBAL_RNG->getSeed();
    std::cout << "seed " << seed[0] << " " << seed[1] << "\n";
    //seed.clear(); seed.push_back(53866); seed.push_back(21201); GLOBAL_RNG->setSeed(seed);
    // 53866 21201
    
    // read in data
    std::string fn = snpFilepath + snpFilename;
    int snpThinBy = 100;
    SnpData* snps = PopulationDataReader().readSnpData(fn,snpThinBy);
    
    // read in tree
    std::vector<AdmixtureTree*> trees;
    bool startTree = false;
    //treeFilename = "";
    if (treeFilename != "")
    {
        // NclReader does not seem to work for Newick strings at this time
        /*
        trees = NclReader::getInstance().readAdmixtureTrees( snpFilepath + treeFilename, "newick" );
        std::cout << "Read " << trees.size() << " trees." << std::endl;
        std::cout << trees[0]->getNewickRepresentation() << std::endl;
         */
        
        // hacky workaround for now...
        //std::string newickStr = "(San:1,((Han:0.348958,Dai:0.348958):0.194964,(((Ket:0.351687,(Koryak:0.344761,(SiberianEskimo:0.325112,(((Huichol:0.269256,(Pima:0.233362,((Karitiana:0.104362,Aymara:0.104362):0.0120051,(Yukpa:0.100877,Mayan:0.100877):0.0154902):0.116995):0.0358943):0.0341607,Athabascan:0.303417):0.0128117,((EastGreenland:0.158699,WestGreenland:0.158699):0.0369703,Aleuts:0.195669):0.120559):0.00888335):0.0196487):0.0069259):0.0987521,((Nivhks:0.278599,Buryat:0.278599):0.0642882,Yakut:0.342887):0.107551):0.0756044,Altai:0.526043):0.0178791):0.456078)";
        //std::string newickStr = "((A:.5,B:.5):.5,C:.5)";
        std::string newickStr = "(San:1,((Koryak:0.926938,(SiberianEskimo:0.812519,((Aleuts:0.762302,(EastGreenland:0.4824,WestGreenland:0.4824):0.279902):0.0144746,((Huichol:0.353522,(Pima:0.324226,((Mayan:0.223067,Yukpa:0.223067):0.0856916,(Aymara:0.173581,Karitiana:0.173581):0.135178):0.015467):0.0292956):0.109212,Athabascan:0.462734):0.314043):0.0357419):0.114419):0.0600046,(Ket:0.772384,((Altai:0.640188,(Han:0.481097,Dai:0.481097):0.159091):0.0445788,((Buryat:0.596074,Nivhks:0.596074):0.015206,Yakut:0.61128):0.0734876):0.0876171):0.214558):0.0130575);";
        NewickConverter nc;
        BranchLengthTree* blt = nc.convertFromNewick(newickStr);
        AdmixtureTree* at = TreeUtilities::convertToAdmixtureTree(*blt, snps->getPopulationNames());
        at->setNames(snps->getPopulationNames());
        at->updateTipOrderByNames(snps->getPopulationNames());
        trees.push_back(at);
        startTree = true;
    }
    
    size_t numTaxa = snps->getNumPopulations();
    size_t numNodes = 2 * numTaxa - 1;
    size_t numBranches = numNodes - 1;
    //size_t numSites = snps->getNumSnps();
    int blockSize = 5000;
    
    double divGens = 1;//.01;
    int delay = 1000;
    int numTreeResults = 500;
    int numAdmixtureResults = 500;
    int maxNumberOfAdmixtureEvents = 1;
    double residualWeight = 2.0;
    
    bool useWishart = true;             // if false, the composite likelihood function is used
    bool useBias = true;               // if false, no covariance bias correction for small sample size is used
    bool useAdmixtureEdges = true;      // if false, no admixture moves or edges are used
    bool useBranchRates = true;         // if false, all populations are of the same size
    bool allowSisterAdmixture = true;   // if false, admixture events cannot be between internal lineages who share a divergence parent
    bool discardNonPosDefMtx = true;    // if false, round negative eigenvalues to positive eps
    bool useContrasts = false;          // nothing really, need to remove
    bool updateParameters = true;
    bool updateTopology = true;
    bool updateNodeAges = true;
    
    bool useParallelMcmcmc = true;
    int numChains = 4;
    int numProcesses = numChains;
//    numProcesses=80;
    int swapInterval = 1;
    double deltaTemp = .1;
    double sigmaTemp = 1.0;
    double hottestTemp = 0.001;
    if (!true)
    {
        deltaTemp = exp(-log(hottestTemp)/pow(numChains-1,sigmaTemp)) - 1;
        std::cout << deltaTemp << "\n";
    }
    
    double startingHeat = 1.0;
    double likelihoodScaler = 1.0;

    std::stringstream rndStr;
    rndStr << std::setw(9) << std::fixed << std::setprecision(0) << std::setfill('0') << std::floor(GLOBAL_RNG->uniform01()*1e9);
    // std::string outName = "papa." + rndStr.str();
    std::string simName = "hgdp";
    std::string outName = simName + "." + rndStr.str();
//    std::string outName = simName + "." + rndStr.str() + "." + snpFilename;
    
    // BM diffusion rate
    ConstantNode<double>* a_bm = new ConstantNode<double>( "bm_a", new double(3));
    ConstantNode<double>* b_bm = new ConstantNode<double>( "bm_b", new double(100));
    //ConstantNode<double>* c_bm = new ConstantNode<double>( "bm_c", new double(0));
    //ConstantNode<double>* d_bm = new ConstantNode<double>( "bm_d", new double(100));
    StochasticNode<double>* diffusionRate = new StochasticNode<double> ("rate_BM", new ExponentialDistribution(b_bm));
    //StochasticNode<double>* diffusionRate = new StochasticNode<double> ("rate_BM", new UniformDistribution(c_bm, d_bm));

    // CPP rate
    // MJL 071713:  Flat Poisson prior cannot overpower model overfitting when lnL is large.
    //              Consider implementing Conway-Maxewell-Poisson distn instead.
    
    // This prior requires admixture events to improve lnL by N units
    // Negative values -> admixture rare
    // Positive values -> admixture common (set admixture cap)
    double adm_th_lnL = 10;
    double rate_cpp_prior = exp(adm_th_lnL);
    
    ConstantNode<double>* c = new ConstantNode<double>( "c", new double(1.0/rate_cpp_prior)); // admixture rate prior
    StochasticNode<double>* admixtureRate = new StochasticNode<double> ("rate_CPP", new ExponentialDistribution(c));
    StochasticNode<int>* admixtureCount = new StochasticNode<int> ("count_CPP", new PoissonDistribution(admixtureRate));
    admixtureCount->clamp(new int(0));
    admixtureRate->clamp(new double(rate_cpp_prior));
    if (!useAdmixtureEdges)
    {
        admixtureRate->clamp(new double(rate_cpp_prior));
        admixtureCount->clamp(new int(0));
    }
    
    
    // birth-death process for ultrametric tree
    StochasticNode<double>* diversificationRate = new StochasticNode<double>("div", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(50.0)) ));
    StochasticNode<double>* turnover = new StochasticNode<double>("turnover", new UniformDistribution(new ConstantNode<double>("do_lower", new double(0.0)), new ConstantNode<double>("do_upper", new double(1.0)) ));
    
    // tree node
    StochasticNode<AdmixtureTree>* tau = new StochasticNode<AdmixtureTree>( "tau", new AdmixtureConstantBirthDeathProcess(diversificationRate, turnover, (int)numTaxa, snps->getPopulationNames(), snps->getOutgroup()) );
    if (startTree)
    {
        tau->setValue(new AdmixtureTree(*trees[0]));
        tau->setIgnoreRedraw(true);
    }
    
    // branch multipliers (mutation rate is clocklike, but population sizes are not)
	std::vector<const TypedDagNode<double> *> branchRates;
    std::vector< ContinuousStochasticNode *> branchRates_nonConst;
    ConstantNode<double>* branchRateA = new ConstantNode<double>( "branchRateA", new double(1));
    ConstantNode<double>* branchRateB = new ConstantNode<double>( "branchRateB", new double(2));
    ConstantNode<double>* branchRateC = new ConstantNode<double>( "branchRateC", new double(0));
    ConstantNode<double>* branchRateD = new ConstantNode<double>( "branchRateD", new double(.01));
    //ConstantNode<double>* branchRateE = new ConstantNode<double>( "branchRateE", new double(10));
	for( size_t i=0; i<numBranches; i++){

        std::ostringstream br_name;
        br_name << "br_" << i;
        //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new ExponentialDistribution(branchRateD) );
        //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new InverseGammaDistribution(branchRateA, branchRateB));
        //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode(br_name.str(), new LognormalDistribution(branchRateC, branchRateA));
        //ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode(br_name.str(), new UniformDistribution(branchRateC, branchRateE));
//        ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new GammaDistribution(branchRateB, branchRateD));
        ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new GammaDistribution(branchRateB, branchRateB));
        
		if (!useBranchRates)
        {
            tmp_branch_rate->clamp(new double(1.0));
        }
        
        branchRates.push_back( tmp_branch_rate );
        branchRates_nonConst.push_back( tmp_branch_rate );
        
    }
    DeterministicNode< std::vector< double > >* br_vector = new DeterministicNode< std::vector< double > >( "br_vector", new VectorFunction< double >( branchRates ) );

    
    // model node
    BrownianMotionAdmixtureGraph* bmag = new BrownianMotionAdmixtureGraph( tau, diffusionRate, admixtureRate, br_vector, snps, useWishart, useContrasts, useBias, discardNonPosDefMtx, blockSize, likelihoodScaler );
    StochasticNode<ContinuousCharacterData >* admixtureModel;
    admixtureModel = new StochasticNode<ContinuousCharacterData >("AdmixtureGraph", bmag);
    
    // have to clamp to distinguish likelihood from prior (incidentally calls setValue(), but this is handled otherwise)
    admixtureModel->clamp( new ContinuousCharacterData() ); // does it event matter how it's clamped?
    
    // residuals
    DeterministicNode<std::vector<double> >* residuals = new DeterministicNode<std::vector<double> >("residuals", new BrownianMotionAdmixtureGraphResiduals(admixtureModel));
    
    // MOVES
    std::cout << "Adding moves\n";
        
    // moves vector
    RbVector<Move> moves;

    // model parameters
    if (updateParameters)
    {
        moves.push_back( new MetropolisHastingsMove( new ScaleProposal(diffusionRate, 0.1), 5, false ) );
        moves.push_back( new MetropolisHastingsMove( new ScaleProposal(diversificationRate, 0.5), 5, false ) );
        moves.push_back( new MetropolisHastingsMove( new ScaleProposal(turnover, 0.5), 5, false ) );
    }
    
    
    // non-admixture tree updates
    if (updateTopology)
    {
        moves.push_back( new AdmixtureNarrowExchange( tau, 0.1, numTaxa/2) );
        moves.push_back( new AdmixtureSubtreePruneRegraft( tau, 0.1, numTaxa/4) );
        moves.push_back( new AdmixtureFixedNodeheightPruneRegraft(tau, numTaxa/4));
        
        moves.push_back( new AdmixtureEdgeReplaceNNI( tau, residuals, residualWeight, delay, 0, allowSisterAdmixture, numTaxa));
        moves.push_back( new AdmixtureEdgeReplaceFNPR( tau, residuals, residualWeight, delay, 0, allowSisterAdmixture, numTaxa));
        moves.push_back( new AdmixtureEdgeReplaceSubtreeRegraft( tau, residuals, residualWeight, delay, 0, allowSisterAdmixture, numTaxa));
    }

    if (updateNodeAges)
    {
        for (size_t i = numTaxa; i < numNodes - 1; i++)
        {
            moves.push_back( new AdmixtureNodeTimeSlideBeta( tau, (int)i, 15.0, false, 1.0 ) );
            moves.push_back( new AdmixtureNodeTimeSlideBeta( tau, (int)i, 1.0, false, 0.5 ) );
        }
    }
    
    // branch rate updates
    if (useBranchRates)
    {
        // branch rate multipliers
        for( size_t i=0; i < numBranches; i++)
        {
            moves.push_back( new MetropolisHastingsMove( new ScaleProposal(branchRates_nonConst[i], 0.1), 1, false ) );
            moves.push_back( new MetropolisHastingsMove( new ScaleProposal(branchRates_nonConst[i], 1.0), .5, false ) );
        }
        
        // tree rate shift
        moves.push_back( new AdmixtureShiftTreeRates(diffusionRate, branchRates_nonConst, 0.5, false, 2.0));
        
        // shift node age for branch rate
        for (size_t i = numTaxa; i < numNodes - 1; i++)
        {
            if (updateNodeAges)
                moves.push_back( new AdmixtureShiftNodeAgeAndRate(tau, branchRates_nonConst, (int)i, 0.7, false, 1.0) );
            
            // MJL 081513: not working, I think...
            if (updateTopology)
            {
                std::vector<DagNode*> pvec;
                pvec.push_back(tau);
                pvec.push_back(branchRates_nonConst[i]);
                //moves.push_back( new AdmixtureSubtreePruneRegraftAndRateShift(pvec, i, 0.5, 1.0) );
                // ... something wrong with how the lnProb is computed using the lnProb ratios...
            }
        }
        
        // NNI with branch rate modifier (not working quite right, disabled)
        if (updateTopology)
            moves.push_back( new AdmixtureNearestNeighborInterchangeAndRateShift( tau, branchRates_nonConst, 0.1, false, numTaxa));
        
        
    }
        
    // admixture tree updates
    if (useAdmixtureEdges)
    {
    
        moves.push_back( new AdmixtureEdgeAddResidualWeights( tau, admixtureRate, admixtureCount, residuals, residualWeight, delay, maxNumberOfAdmixtureEvents, allowSisterAdmixture, 10.0) );
        moves.push_back( new AdmixtureEdgeRemoveResidualWeights( tau, admixtureRate, admixtureCount, residuals, residualWeight, delay, 10.0) );
        moves.push_back( new AdmixtureEdgeReplaceResidualWeights( tau, admixtureRate, branchRates_nonConst, residuals, residualWeight, delay, allowSisterAdmixture, 20.0) );
        //moves.push_back( new AdmixtureEdgeMultiRemove( tau, admixtureRate, admixtureCount, residuals, residualWeight, delay, 2.0 ) );
      
        //moves.push_back( new AdmixtureReplaceAndNNI(  tau, 0.5, 10.0) );
        //moves.push_back( new AdmixtureEdgeAddCladeResiduals( tau, admixtureRate, admixtureCount, residuals, delay, maxNumberOfAdmixtureEvents, allowSisterAdmixture, 2.0) );
        //moves.push_back( new AdmixtureEdgeReplaceCladeResiduals( tau, admixtureRate, branchRates_nonConst, residuals, delay, allowSisterAdmixture, 15.0) );
        
        if (updateTopology)
        {
            moves.push_back( new AdmixtureEdgeDivergenceMerge( tau, admixtureRate, branchRates_nonConst, admixtureCount, residuals, delay, allowSisterAdmixture, 5.0 ));
            moves.push_back( new AdmixtureEdgeRegraftReplace( tau, residuals, 1.0, delay, maxNumberOfAdmixtureEvents, allowSisterAdmixture, 5.0));
            ;
            
        }
        
        moves.push_back( new AdmixtureEdgeReweight( tau, delay, 10.0, 10.0) );
        moves.push_back( new AdmixtureEdgeReversePolarity( tau, delay, 2.0, 10.0) );
        moves.push_back( new AdmixtureEdgeSlide( tau, branchRates_nonConst, delay, allowSisterAdmixture, 10.0, 10.0) );
        moves.push_back( new AdmixtureEdgeFNPR( tau, branchRates_nonConst, delay, allowSisterAdmixture, 10.0, 10.0) );
        moves.push_back( new MetropolisHastingsMove( new ScaleProposal(admixtureRate, 0.1), 5, false ) );

    }
    

    
    // MONITORS
    std::cout << "Adding monitors\n";
    RbVector<Monitor> monitors;
    
    // parameter monitor
    std::vector<DagNode*> monitoredNodes;
    monitoredNodes.push_back( diffusionRate );
    monitoredNodes.push_back( admixtureRate );
    monitoredNodes.push_back( diversificationRate );
    monitoredNodes.push_back( turnover );
    monitoredNodes.push_back( admixtureCount );
    
    if (useBranchRates)
    {
        for( size_t i=0; i<numBranches; i++){
            monitoredNodes.push_back( branchRates_nonConst[i] );
        }
    }
    
    monitors.push_back( new FileMonitor( monitoredNodes, 1, "/Users/mlandis/data/admix/output/" + outName + ".parameters.txt", "\t", true, true, true, useParallelMcmcmc, useParallelMcmcmc, useParallelMcmcmc ) );
    monitors.push_back( new ScreenMonitor( monitoredNodes, 1, "\t" ) );
 
    monitors.push_back( new AdmixtureBipartitionMonitor(tau, diffusionRate, br_vector, numTreeResults, numAdmixtureResults, 1, "/Users/mlandis/data/admix/output/" + outName + ".bipartitions.txt", "\t", true, true, true, true, true, true ) );
    monitors.push_back( new AdmixtureResidualsMonitor(residuals, snps->getPopulationNames(), 10, "/Users/mlandis/data/admix/output/" + outName + ".residuals.txt", "\t", true, true, true, true ) );

    //monitors.push_back( new ExtendedNewickAdmixtureTreeMonitor( tau, br_vector, true, true, 10, "/Users/mlandis/data/admix/output/" + outName + ".admixture_trees.txt", "\t", true, true, true, true ) );
    //monitors.push_back( new ExtendedNewickAdmixtureTreeMonitor( tau, br_vector, false, true, 10, "/Users/mlandis/data/admix/output/" + outName + ".topology_trees.trees", "\t", true, true, true, true ) );
    monitors.push_back( new ExtendedNewickAdmixtureTreeMonitor( tau, br_vector, false, false, 10, "/Users/mlandis/data/admix/output/" + outName + ".time_trees.trees", "\t", true, true, true, true ) );
    
    
    
    
    // MODEL
    std::cout << "Calling model\n";
    std::set<const DagNode*> mset;
    mset.insert(admixtureRate);
    Model myModel = Model(mset);
    
    
    // MCMC
    std::cout << "Calling mcmc\n";
    if (!useParallelMcmcmc)
    {
        Mcmc myMcmc = Mcmc(myModel, moves, monitors);
        myMcmc.run(mcmcGenerations);
        myMcmc.printOperatorSummary();
    }
    else
    {
        ParallelMcmcmc myPmc3(myModel, moves, monitors, "random", numChains, numProcesses, swapInterval, deltaTemp, sigmaTemp, startingHeat);
        myPmc3.run(mcmcGenerations/divGens);
        myPmc3.printOperatorSummary();
    }

    std::cout << "All done!\n";
    
    
    // OBJECT CLEANUP
    delete snps;
    delete a_bm;
    delete b_bm;
    delete c;
    delete tau;
    delete diversificationRate;
    delete turnover;
    delete diffusionRate;
    delete admixtureRate;
    delete admixtureCount;
    delete branchRateA;
    delete branchRateB;
    delete branchRateC;
    delete branchRateD;
    branchRates_nonConst.clear();
    branchRates.clear();
    delete br_vector;
    //delete bmag; // malloc deallocation error
    delete admixtureModel;
    delete residuals;
    
    moves.clear();
    monitors.clear();
    
    return true;
}
Ejemplo n.º 11
0
void HeterogeneousRateBirthDeath::executeMethod(const std::string &n, const std::vector<const DagNode *> &args, RbVector<double> &rv) const
{
    
    if ( n == "averageSpeciationRate" )
    {
        size_t num_branches = branch_histories.getNumberBranches();
        const RbVector<double> &lambda = speciation->getValue();
        rv.clear();
        rv.resize( num_branches );
        
        for (size_t i = 0; i < num_branches; ++i)
        {
            const TopologyNode &node = this->value->getNode( i );
            const BranchHistory& bh = branch_histories[ i ];
            const std::multiset<CharacterEvent*,CharacterEventCompare>& hist = bh.getHistory();
            size_t state_index_rootwards = computeStartIndex( node.getParent().getIndex() );
            
            double rate = 0;
            double begin_time = 0.0;
            double branch_length = node.getBranchLength();
            for (std::multiset<CharacterEvent*,CharacterEventCompare>::const_iterator it=hist.begin(); it!=hist.end(); ++it)
            {
                CharacterEvent* event = *it;
                double end_time = event->getTime();
                double time_interval = (end_time - begin_time) / branch_length;
                
                // we need to set the current rate caterogy
                size_t current_state = event->getState();

                rate += time_interval * lambda[current_state];
                
                begin_time = end_time;
            }
            rate += (branch_length-begin_time)/branch_length * lambda[state_index_rootwards];
            
            rv[i] = rate;
            
        }
        
    }
    else if ( n == "averageExtinctionRate" )
    {
        size_t num_branches = branch_histories.getNumberBranches();
        const RbVector<double> &mu = extinction->getValue();
        rv.clear();
        rv.resize( num_branches );
        
        for (size_t i = 0; i < num_branches; ++i)
        {
            const TopologyNode &node = this->value->getNode( i );
            const BranchHistory& bh = branch_histories[ i ];
            const std::multiset<CharacterEvent*,CharacterEventCompare>& hist = bh.getHistory();
            size_t state_index_rootwards = computeStartIndex( node.getParent().getIndex() );
            
            double rate = 0;
            double begin_time = 0.0;
            double branch_length = node.getBranchLength();
            for (std::multiset<CharacterEvent*,CharacterEventCompare>::const_iterator it=hist.begin(); it!=hist.end(); ++it)
            {
                CharacterEvent* event = *it;
                double end_time = event->getTime();
                double time_interval = (end_time - begin_time) / branch_length;
                
                // we need to set the current rate caterogy
                size_t current_state = event->getState();
                
                rate += time_interval * mu[current_state];
                
                begin_time = end_time;
            }
            rate += (branch_length-begin_time)/branch_length * mu[state_index_rootwards];
            
            rv[i] = rate;
            
        }
        
    }
    else
    {
        throw RbException("The heterogeneous rate birth-death process does not have a member method called '" + n + "'.");
    }
    
}
Ejemplo n.º 12
0
bool TestACLNRatesGen::run( void ) {

//    alignmentFilename = "/Users/tracyh/Code/RevBayes_proj/tests/time_trees/tt_CLK_GTRG.nex";
//    treeFilename = "/Users/tracyh/Code/RevBayes_proj/tests/time_trees/tt_CLK_true_relx.tre";
	
	std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename);
    std::cout << "Read " << data.size() << " matrices." << std::endl;
    std::cout << data[0] << std::endl;
	
	// First, we read in the data 
    std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename );
    std::cout << "Read " << trees.size() << " trees." << std::endl;
    std::cout << trees[0]->getNewickRepresentation() << std::endl;
    
    
	// #######################################
    // ###### birth-death process priors #####
	// #######################################
	
	//   Constant nodes
	ConstantNode<double> *dLambda = new ConstantNode<double>("div_rate", new double(1.0 / 5.0));		// Exponential rate for prior on div
	ConstantNode<double> *turnA   = new ConstantNode<double>("turn_alpha", new double(2.0));			// Beta distribution alpha
	ConstantNode<double> *turnB   = new ConstantNode<double>("turn_beta", new double(2.0));				// Beta distribution beta
    ConstantNode<double> *rho     = new ConstantNode<double>("rho", new double(1.0));					// assume 100% sampling for now
	ConstantNode<double> *meanOT  = new ConstantNode<double>("meanOT", new double(trees[0]->getRoot().getAge()*1.5));
	ConstantNode<double> *stdOT   = new ConstantNode<double>("stdOT", new double(10.0));
	
	//   Stochastic nodes
    StochasticNode<double> *origin  = new StochasticNode<double>( "origin", new NormalDistribution(meanOT, stdOT) );
    StochasticNode<double> *div   = new StochasticNode<double>("diversification", new ExponentialDistribution(dLambda));
    StochasticNode<double> *turn  = new StochasticNode<double>("turnover", new BetaDistribution(turnA, turnB));
	
	//   Deterministic nodes
	//    birthRate = div / (1 - turn)
	DeterministicNode<double> *birthRate = new DeterministicNode<double>("birth_rate", new BirthRateConstBDStatistic(div, turn));
	//    deathRate = (div * turn) / ( 1 - turn)
	DeterministicNode<double> *deathRate = new DeterministicNode<double>("death_rate", new DeathRateConstBDStatistic(div, turn));
	// For some datasets with large root ages, if div>1.0 (or so), the probability is NaN
	RandomNumberGenerator* rng = GLOBAL_RNG;
	div->setValue(rng->uniform01() / 1.5);
	
	// Birth-death tree
    std::vector<std::string> names = data[0]->getTaxonNames();
    std::vector<RevBayesCore::Taxon> taxa;
    for (size_t i = 0; i < names.size(); ++i)
    {
        taxa.push_back( Taxon( names[i] ) );
    }
    StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, birthRate, deathRate, rho, "uniform", "nTaxa", taxa, std::vector<Clade>()) );

    DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) );
	
	
	// ##############################################
	// #### ACLN Model on Branch Rates #####
	// ##############################################
	
	size_t numBranches = 2 * data[0]->getNumberOfTaxa() - 2;
	size_t numNodes = numBranches + 1; // model rates at nodes
	
    ConstantNode<double> *a      = new ConstantNode<double>("a", new double(4.0) );
    ConstantNode<double> *b      = new ConstantNode<double>("b", new double(4.0) );
    ConstantNode<double> *anu    = new ConstantNode<double>("a_nu", new double(1.0) );
    ConstantNode<double> *bnu    = new ConstantNode<double>("b_nu", new double(8.0) );
	
	StochasticNode<double> *rootRate = new StochasticNode<double>("root.rate", new GammaDistribution(a, b));
	StochasticNode<double> *bmNu = new StochasticNode<double>("BM_var", new GammaDistribution(anu, bnu));
	
	size_t rootID = trees[0]->getRoot().getIndex();

	ConstantNode<double> *crInv  = new ConstantNode<double>("invCr", new double(1.0) );
	DeterministicNode<double> *scaleRate = new DeterministicNode<double>("scaleRate", new BinaryDivision<double, double, double>(crInv, treeHeight));

	StochasticNode< std::vector< double > > *nodeRates = new StochasticNode< std::vector< double > >( "NodeRates", new AutocorrelatedLognormalRateDistribution(tau, bmNu, rootRate, scaleRate) );
	
	std::cout << nodeRates->getValue().size() << std::endl;
	

	std::vector<const TypedDagNode<double> *> branchRates;
	for( size_t i=0; i<numBranches; i++){
		std::ostringstream brName;
        brName << "br(" << i << ")";
		DeterministicNode<double> *tmpBrRt = new DeterministicNode<double>(brName.str(), new RateOnBranchAve(nodeRates, tau, scaleRate, i));
		branchRates.push_back( tmpBrRt );
	}
    DeterministicNode< std::vector< double > >* brVector = new DeterministicNode< std::vector< double > >( "branchRates", new VectorFunction< double >( branchRates ) );
	
	// making a combined DagNode for a compound move
	std::vector<DagNode*> treeAndRates;
	treeAndRates.push_back( tau );
	treeAndRates.push_back(nodeRates);
	treeAndRates.push_back(rootRate);

	
	// ####################################
	
	
    // ###### GTR model priors ######
	//    Constant nodes
    ConstantNode<std::vector<double> > *bf   = new ConstantNode<std::vector<double> >( "bf", new std::vector<double>(4,1.0) );
    ConstantNode<std::vector<double> > *e    = new ConstantNode<std::vector<double> >( "e", new std::vector<double>(6,1.0) );
    //    Stochastic nodes
    StochasticNode<std::vector<double> > *pi = new StochasticNode<std::vector<double> >( "pi", new DirichletDistribution(bf) );
    StochasticNode<std::vector<double> > *er = new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) );
	
    DeterministicNode<RateMatrix> *q = new DeterministicNode<RateMatrix>( "Q", new GtrRateMatrixFunction(er, pi) );
    std::cout << "Q:\t" << q->getValue() << std::endl;

	// ####### Gamma Rate Het. ######
	
	ConstantNode<double> *shapePr = new ConstantNode<double>("gammaShPr", new double(0.5));
	StochasticNode<double> *srAlpha = new StochasticNode<double>("siteRates.alpha", new ExponentialDistribution(shapePr));
    ConstantNode<double> *q1 = new ConstantNode<double>("q1", new double(0.125) );
    DeterministicNode<double> *q1Value = new DeterministicNode<double>("q1_value", new QuantileFunction(q1, new GammaDistribution(srAlpha, srAlpha) ) );
    ConstantNode<double> *q2 = new ConstantNode<double>("q2", new double(0.375) );
    DeterministicNode<double> *q2Value = new DeterministicNode<double>("q2_value", new QuantileFunction(q2, new GammaDistribution(srAlpha, srAlpha) ) );
    ConstantNode<double> *q3 = new ConstantNode<double>("q3", new double(0.625) );
    DeterministicNode<double> *q3Value = new DeterministicNode<double>("q3_value", new QuantileFunction(q3, new GammaDistribution(srAlpha, srAlpha) ) );
    ConstantNode<double> *q4 = new ConstantNode<double>("q4", new double(0.875) );
    DeterministicNode<double> *q4Value = new DeterministicNode<double>("q4_value", new QuantileFunction(q4, new GammaDistribution(srAlpha, srAlpha) ) );
    std::vector<const TypedDagNode<double>* > gammaRates = std::vector<const TypedDagNode<double>* >();
    gammaRates.push_back(q1Value);
    gammaRates.push_back(q2Value);
    gammaRates.push_back(q3Value);
    gammaRates.push_back(q4Value);
    DeterministicNode<std::vector<double> > *siteRates = new DeterministicNode<std::vector<double> >( "site_rates", new VectorFunction<double>(gammaRates) );
    DeterministicNode<std::vector<double> > *siteRatesNormed = new DeterministicNode<std::vector<double> >( "site_rates_norm", new NormalizeVectorFunction(siteRates) );
    
	
	tau->setValue( trees[0] );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
	std::cout << " ** origin   " << origin->getValue() << std::endl;
	std::cout << " ** root age " << trees[0]->getRoot().getAge() << std::endl;
	
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters());
	phyloCTMC->setClockRate( brVector ); 
    phyloCTMC->setRateMatrix( q );
	phyloCTMC->setSiteRates( siteRatesNormed );
    StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC );
	charactermodel->clamp( data[0] );
	
	std::cout << " diversification: " << div->getValue() << std::endl;
	std::cout << " turnover: " << turn->getValue() << std::endl;
	std::cout << " birth rate: " << birthRate->getValue() << std::endl;
	std::cout << " death rate: " << deathRate->getValue() << std::endl;
	
	/* add the moves */
    RbVector<Move> moves;
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 1.0, true ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(turn, 1.0), 1.0, true ) );
	//	moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) );
	//	moves.push_back( new NarrowExchange( tau, 10.0 ) );
	//	moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) );
	//	moves.push_back( new SubtreeScale( tau, 5.0 ) );
	//	moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) );
	moves.push_back( new RootTimeSlide( tau, 50.0, true, 10.0 ) );
	moves.push_back( new OriginTimeSlide( origin, tau, 50.0, true, 10.0 ) );
	moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) );
	moves.push_back( new SimplexMove( er, 450.0, 6, 0, true, 2.0, 0.5 ) );
	moves.push_back( new SimplexMove( pi, 250.0, 4, 0, true, 2.0, 0.5 ) ); 
	moves.push_back( new SimplexMove( er, 200.0, 1, 0, false, 0.5 ) );
	moves.push_back( new SimplexMove( pi, 100.0, 1, 0, false, 0.5 ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(srAlpha, log(2.0)), 1, true ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(bmNu, 0.75), 4, true ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(rootRate, 0.5), 2, false ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(rootRate, 1.0), 2, false ) );
	moves.push_back( new ScaleSingleACLNRatesMove( nodeRates, 1.0, false, 8.0 * (double)numNodes) );
	moves.push_back( new ScaleSingleACLNRatesMove( nodeRates, 2.0, false, 8.0 * (double)numNodes) );
	moves.push_back( new RateAgeACLNMixingMove( treeAndRates, 0.02, false, 2.0 ) ); 
	
    // add some tree stats to monitor
	DeterministicNode<double> *meanNdRate = new DeterministicNode<double>("MeanNodeRate", new MeanVecContinuousValStatistic(nodeRates) );
	
    /* add the monitors */
    RbVector<Monitor> monitors;
    std::vector<DagNode*> monitoredNodes;
	monitoredNodes.push_back( meanNdRate );
	monitoredNodes.push_back( treeHeight );
	monitoredNodes.push_back( origin );
	monitoredNodes.push_back( nodeRates );
	monitoredNodes.push_back( rootRate );
	monitoredNodes.push_back( bmNu );
	monitoredNodes.push_back( scaleRate );
	monitors.push_back( new ScreenMonitor( monitoredNodes, 10, "\t" ) );
	
	monitoredNodes.push_back( div );
	monitoredNodes.push_back( turn );
	monitoredNodes.push_back( birthRate );
	monitoredNodes.push_back( deathRate );
	monitoredNodes.push_back( pi );
    monitoredNodes.push_back( er );
    monitoredNodes.push_back( srAlpha );
	monitoredNodes.push_back( brVector );
	
	std::string logFN = "clock_test/test_rb_ACLN_6June_rn_3.log";
	monitors.push_back( new FileMonitor( monitoredNodes, 10, logFN, "\t" ) );
	
    std::set<DagNode*> monitoredNodes2;
    monitoredNodes2.insert( tau );
	
//	std::string treFN = "clock_test/test_rb_ACLN_6June_pr.tre";
//	monitors.push_back( new FileMonitor( monitoredNodes2, 10, treFN, "\t", false, false, false ) );
    
    /* instantiate the model */
    Model myModel = Model(q);
	
	mcmcGenerations = 200000;

    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    myMcmc.printOperatorSummary();
	
	
	/* clean up */
	//	delete div;
	//	delete turn;
	//	delete rho;
	//	delete cp;
	//	delete branchRates;
	//	delete q;
	//	delete tau;
	delete charactermodel;
	//	delete a;
	//	delete birthRate;
	//	delete phyloCTMC;
	//	delete dLambda;
	
	
	monitors.clear();
	moves.clear();
	
    return true;
}
bool TestAutocorrelatedBranchHeterogeneousGtrModel::run( void ) {
    
    // fix the rng seed
    std::vector<unsigned int> seed;
    seed.push_back(25);
    seed.push_back(42);
    GLOBAL_RNG->setSeed(seed);
    
    /* First, we read in the data */
    // the matrix
    std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename);
    std::cout << "Read " << data.size() << " matrices." << std::endl;
    std::cout << data[0] << std::endl;
    
    std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename );
    std::cout << "Read " << trees.size() << " trees." << std::endl;
    std::cout << trees[0]->getNewickRepresentation() << std::endl;
    
    
    /* set up the model graph */
    
    //////////////////////
    // first the priors //
    //////////////////////
    
    // birth-death process priors
    StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(100.0)) ));
    ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0));
    ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0));
    // gtr model priors
    ConstantNode<std::vector<double> > *bf = new ConstantNode<std::vector<double> >( "bf", new std::vector<double>(4,1.0) );
    ConstantNode<std::vector<double> > *e = new ConstantNode<std::vector<double> >( "e", new std::vector<double>(6,1.0) );
    
    //Root frequencies
    StochasticNode<std::vector<double> > *rf = new StochasticNode<std::vector<double> >( "rf", new DirichletDistribution(bf) );
    
    
    StochasticNode<std::vector<double> > * er = new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) ) ;
    
    std::cout << "bf:\t" << bf->getValue() << std::endl;
    std::cout << "e:\t" << e->getValue() << std::endl;
    
    
    std::vector<std::string> names = data[0]->getTaxonNames();
    ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( trees[0]->getRoot().getAge()*2.0 ) );
    std::vector<RevBayesCore::Taxon> taxa;
    for (size_t i = 0; i < names.size(); ++i)
    {
        taxa.push_back( Taxon( names[i] ) );
    }
    StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, div, turn, rho, "uniform", "survival", taxa, std::vector<Clade>()) );
    
    tau->setValue( trees[0] );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
    
    std::vector<StochasticNode < std::vector<double> >* > pis;
    //  std::vector<StochasticNode < std::vector<double> >* > ers;
    std::vector< const TypedDagNode < RateMatrix>* > qs;
    
    ConstantNode<double> *alpha_prior_shape = new ConstantNode< double >("alpha_prior_shape", new double( 5.0 ) );
    ConstantNode<double> *alpha_prior_rate = new ConstantNode< double >("alpha_prior_rtae", new double( 0.5 ) );
    StochasticNode<double> *alpha = new StochasticNode<double>("alpha", new GammaDistribution( alpha_prior_shape, alpha_prior_rate ) );
    
    ConstantNode<double> *beta_prior_shape1 = new ConstantNode< double >("beta_prior_shape1", new double( 2.0 ) );
    ConstantNode<double> *beta_prior_shape2 = new ConstantNode< double >("beta_prior_shape2", new double( 5.0 ) );
    StochasticNode<double> *beta = new StochasticNode<double>("beta", new BetaDistribution( beta_prior_shape1, beta_prior_shape2 ) );
    
    StochasticNode< RbVector<RateMatrix> > *perBranchQ = new StochasticNode< RbVector< RateMatrix > >( "autocorrBranchRate", new AutocorrelatedBranchMatrixDistribution( tau, beta, rf, er, alpha ) );

    //    StochasticNode< std::vector<RateMatrix> > *perBranchQ = new StochasticNode< std::vector< RateMatrix > >( "autocorrBranchRate", new DPP< RateMatrix >( tau, ... ) );
    
//    
//    for (unsigned int i = 0 ; i < numBranches ; i++ ) {
//        std::ostringstream pi_name;
//        pi_name << "pi(" << i << ")";
//        pis.push_back(new StochasticNode<std::vector<double> >( pi_name.str(), new DirichletDistribution(bf) ) );
//        //  ers.push_back(new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) ) );
//        std::ostringstream q_name;
//        q_name << "q(" << i << ")";
//        qs.push_back(new DeterministicNode<RateMatrix>( q_name.str(), new GtrRateMatrixFunction(er, pis[i]) ));
//        std::cout << "Q:\t" << qs[i]->getValue() << std::endl;
//    }
    
    
    // and the character model
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters());
    phyloCTMC->setRootFrequencies( rf );
    phyloCTMC->setRateMatrix( perBranchQ );
    StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC );
    charactermodel->clamp( data[0] );
    
    
    /* add the moves */
    RbVector<Move> moves;
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 2, true ) );
    moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) );
    moves.push_back( new NarrowExchange( tau, 10.0 ) );
    moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) );
    moves.push_back( new SubtreeScale( tau, 5.0 ) );
//    moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) );
    moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) );
    moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) );
    moves.push_back( new SimplexMove( er, 10.0, 1, 0, true, 2.0 ) );
    moves.push_back( new SimplexMove( er, 100.0, 6, 0, true, 2.0 ) );
    moves.push_back( new SimplexMove( rf, 10.0, 1, 0, true, 2.0 ) );
    moves.push_back( new SimplexMove( rf, 100.0, 4, 0, true, 2.0 ) );
    
//    for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
//        //     moves.push_back( new SimplexMove( ers[i], 10.0, 1, true, 2.0 ) );
//        moves.push_back( new SimplexMove( pis[i], 10.0, 1, true, 2.0 ) );
//        //    moves.push_back( new SimplexMove( ers[i], 100.0, 6, true, 2.0 ) );
//        moves.push_back( new SimplexMove( pis[i], 100.0, 4, true, 2.0 ) );
//    }
    // add some tree stats to monitor
    DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    std::set<DagNode*> monitoredNodes;
    //    monitoredNodes.insert( er );
    //    monitoredNodes.insert( pi );
    monitoredNodes.insert( div );
    monitors.push_back( new FileMonitor( monitoredNodes, 10, "TestAutocorrelatedBranchHeterogeneousGtrModel.log", "\t" ) );
    std::set<DagNode*> monitoredNodes1;
    monitoredNodes1.insert( er );
    /*    for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
     monitoredNodes1.insert( pis[i] );
     }*/
    monitoredNodes1.insert( rf );
    monitoredNodes1.insert( treeHeight );
    monitors.push_back( new FileMonitor( monitoredNodes1, 10, "TestAutocorrelatedBranchHeterogeneousGtrModelSubstRates.log", "\t" ) );
    monitors.push_back( new ScreenMonitor( monitoredNodes1, 10, "\t" ) );
    std::set<DagNode*> monitoredNodes2;
    monitoredNodes2.insert( tau );
    monitors.push_back( new FileMonitor( monitoredNodes2, 10, "TestAutocorrelatedBranchHeterogeneousGtrModel.tree", "\t", false, false, false ) );
    
    /* instantiate the model */
    Model myModel = Model( tau );
    
    std::vector<DagNode*> &nodes = myModel.getDagNodes();
    for (std::vector<DagNode*>::iterator it = nodes.begin(); it != nodes.end(); ++it) {
        std::cerr << (*it)->getName() << std::endl;
    }
    
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    myMcmc.printOperatorSummary();
    
    /* clean up */
    //    for (size_t i = 0; i < 10; ++i) {
    //        delete x[i];
    //    }
    //    delete [] x;
    delete div;
    //    delete sigma;
    //    delete a;
    //    delete b;
    //    delete c;
    
    std::cout << "Finished Autocorrelated Branch Heterogeneous GTR model test." << std::endl;
    
    return true;
}
Ejemplo n.º 14
0
/**
 * Perform the proposal.
 *
 * A Dirichlet-simplex proposal randomly changes some values of a simplex, although the other values
 * change too because of the renormalization.
 * First, some random indices are drawn. Then, the proposal draws a new somplex
 *   u ~ Dirichlet(val[index] * alpha)
 * where alpha is the tuning parameter.The new value is set to u.
 * The simplex is then renormalized.
 *
 * \return The hastings ratio.
 */
double DirichletSimplexProposal::doProposal( void )
{
    
    // Get random number generator
    RandomNumberGenerator* rng     = GLOBAL_RNG;
    
    // store the value
    storedValue = variable->getValue();
    
    const RbVector<double>& curVal = variable->getValue();
    RbVector<double> newVal = curVal;
    size_t              n      = curVal.size();
    
    /* We update the simplex values by proposing new values from a Dirichlet centered
     on the current values. The i-th parameter of the Dirichlet is the i-th value
     in the simplex multiplied by a parameter (alpha0, AKA tuning) that controls the
     variance of the Dirichlet. We implement two cases of this general move. In one
     case, all of the elements of the simplex are targeted for update (n == k). In the
     other, more complicated, case a subset of the elements of the simplex are updated
     (k < n). Here, we construct a smaller simplex with k+1 elements. The first k of the
     elements are the values from the full simplex that were targeted for update. The last
     element of the smaller simplex accumulates the probabilities of all of the simplex
     values in the full simplex that were not targeted for update. We then update the
     small simplex by centering a Dirichlet on the small simplex. The values for those elements
     in the full simplex that were not targeted for update are all changed proportionally.
     This means that we need to calculate the Jacobian for the Hastings ratio in this case. */
    double lnProposalRatio = 0.0;
    if ( nCategories > n )
    {
        // we can't update more elements than there are elements in the simplex
        throw RbException( "Attempting to update too many simplex variables" );
    }
    else if ( nCategories < 1 )
    {
        // we also can't update 0 or a negative number of elements
        throw RbException( "Attempting to update too few simplex variables" );
    }
    else if ( nCategories < n )
    {
        // we update a subset of the elements in the full simplex
        // pick k values at random, producing a map from the index in the full vector (curVal) to
        // the index in the reduced vector (x, alpha, and z)
        std::vector<size_t> indicesToUpdate;
        std::vector<size_t> tmpV;
        for (size_t i=0; i<n; i++)
        {
            tmpV.push_back(i);
        }
        
        RbStatistics::Helper::randomlySelectFromVectorWithoutReplacement<size_t>(tmpV, indicesToUpdate, nCategories, *rng);
        std::map<size_t,size_t> mapper;
        for (size_t i=0; i<indicesToUpdate.size(); i++)
        {
            mapper.insert( std::make_pair(indicesToUpdate[i], i) );
        }
        
        // set up the vectors
        std::vector<double> x(indicesToUpdate.size()+1, 0.0);
        std::vector<double> kappaV(indicesToUpdate.size()+1, 0.0);
        std::vector<double> alphaForward(indicesToUpdate.size()+1, 0.0);
        std::vector<double> alphaReverse(indicesToUpdate.size()+1, 0.0);
        std::vector<double> z(indicesToUpdate.size()+1, 0.0);
        for (size_t i=0; i<n; i++)
        {
            std::map<size_t,size_t>::iterator it = mapper.find(i);
            if (it != mapper.end())
            {
                x[it->second] += curVal[it->first];
                kappaV[it->second] += kappa;
            }
            else
            {
                x[x.size()-1] += curVal[i];
                kappaV[kappaV.size()-1] += kappa;
            }
        }
        
        for (size_t i=0; i<x.size(); i++)
        {
            alphaForward[i] = (x[i]+offset) * alpha + kappaV[i];
        }
        
        // draw a new value for the reduced vector
        z = RbStatistics::Dirichlet::rv( alphaForward, *rng );
        
        // fill in the Dirichlet parameters for the reverse probability calculations
        for (size_t i=0; i<z.size(); i++)
        {
            alphaReverse[i] = (z[i]+offset) * alpha + kappaV[i];
        }
        
        // fill in the full vector
        double factor = z[z.size()-1] / x[x.size()-1];
        for (size_t i=0; i<n; i++)
        {
            std::map<size_t,size_t>::iterator it = mapper.find(i);
            if (it != mapper.end())
            {
                newVal[i] = z[it->second];
            }
            else
            {
                newVal[i] = curVal[i] * factor;
            }
            
            // test for 0-values
            if ( newVal[i] < 1E-100)
            {
                return RbConstants::Double::neginf;
            }
        }
        
        try
        {
            // Hastings ratio
            lnProposalRatio  = RbStatistics::Dirichlet::lnPdf(alphaReverse, x) - RbStatistics::Dirichlet::lnPdf(alphaForward, z); // Hastings Ratio
            lnProposalRatio += (n - nCategories) * log(factor); // Jacobian
        }
        catch (RbException e)
        {
            lnProposalRatio = RbConstants::Double::neginf;
        }
        
        
    }
    else
    {
        // we update all of the elements in the vector
        // first, we get the parameters of the Dirichlet for the forward move
        std::vector<double> alphaForward(curVal.size());
        for (size_t i=0; i<curVal.size(); i++)
        {
            alphaForward[i] = (curVal[i]+offset) * alpha + kappa;
            // we need to check for 0 values
            if (alphaForward[i] < 1E-100)
            {
                // very low proposal probability which will hopefully result into a rejected proposal
                return RbConstants::Double::neginf;
            }
        }
        
        // then, we propose new values
        newVal = RbStatistics::Dirichlet::rv( alphaForward, *rng );
        
        // and calculate the Dirichlet parameters for the (imagined) reverse move
        std::vector<double> alphaReverse(newVal.size());
        for (size_t i=0; i<curVal.size(); i++)
        {
            alphaReverse[i] = (newVal[i]+offset) * alpha + kappa;
            // we need to check for 0 values
            if (alphaReverse[i] < 1E-100)
            {
                // very low proposal probability which will hopefully result into a rejected proposal
                return RbConstants::Double::neginf;
            }
        }
        
                
        try
        {
            // finally, we calculate the log of the Hastings ratio
            lnProposalRatio = RbStatistics::Dirichlet::lnPdf(alphaReverse, curVal) - RbStatistics::Dirichlet::lnPdf(alphaForward, newVal);
        }
        catch (RbException e)
        {
            lnProposalRatio = RbConstants::Double::neginf;
        }
    }
    
    variable->setValue( new RbVector<double>(newVal), false );
    
    return lnProposalRatio;
}
bool TestUCLNRelaxedClockBHT92Model::run( void ) {
    
    std::vector<unsigned int> seeds;
    seeds.push_back(7);
    seeds.push_back(4);
    GLOBAL_RNG->setSeed( seeds );
    
    /* First, we read in the data */
    // the matrix
    std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename);
    std::cout << "Read " << data.size() << " matrices." << std::endl;
    std::cout << data[0] << std::endl;
    
    std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename );
    std::cout << "Read " << trees.size() << " trees." << std::endl;
    std::cout << trees[0]->getNewickRepresentation() << std::endl;
    
    
    /* set up the model graph */
    
    //////////////////////
    // first the priors //
    //////////////////////
    
    // birth-death process priors
    StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(100.0)) ));
    ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0));
    ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0));
    
	
    // Setting up the substitution model //
	
    //ts/tv ratio:
    ConstantNode<double > *tstv_prior = new ConstantNode<double >( "tstv_prior", new double(0.25) );
    ContinuousStochasticNode *tstv = new ContinuousStochasticNode("tstv", new ExponentialDistribution(tstv_prior) );
	
    //GC content prior:
    ConstantNode<double > *eq_gc_prior = new ConstantNode<double >( "eq_gc_prior_ab", new double(1.0) );    
	
    //Root GC frequency
    
    StochasticNode< double  > *omega = new StochasticNode< double >( "omega", new BetaDistribution(eq_gc_prior,eq_gc_prior) );
    DeterministicNode<std::vector<double> > *rf = new DeterministicNode< std::vector<double> >( "rf", new NucleotideFrequenciesFromGcContentFunction( omega ) );
	
    std::cout << "omega:\t" << omega->getValue() << std::endl;
    std::cout << "rf:\t" << rf->getValue() << std::endl;
    std::cout << "tstv:\t" << tstv->getValue() << std::endl;
    
    //Declaring a vector of matrices, one per branch
    size_t numBranches = 2*data[0]->getNumberOfTaxa() - 2;
    std::vector<ContinuousStochasticNode*> thetas;
    std::vector< const TypedDagNode < RateMatrix >* > qs;
	
	//Equilibrium GC frequency: one per branch, defined in the loop along with the T92 rate matrices.
    for (unsigned int i = 0 ; i < numBranches ; i++ ) {
        std::ostringstream eq_gc_name;
        eq_gc_name << "eq_gc(" << i << ")";
        thetas.push_back(new ContinuousStochasticNode( eq_gc_name.str(), new BetaDistribution(eq_gc_prior,eq_gc_prior) ) );
		std::ostringstream q_name;
        q_name << "q(" << i << ")";
		qs.push_back(new DeterministicNode< RateMatrix >( q_name.str(), new Tamura92RateMatrixFunction( thetas[i], tstv) ));
        //std::cout << "Matrix Q:\t"<<i<<"\t" << qs[i]->getValue() << std::endl;
    }
    
	//Build a node out of the vector of nodes
    DeterministicNode< RbVector< RateMatrix > >* qs_node = new DeterministicNode< RbVector< RateMatrix > >( "q_vector", new RbVectorFunction<RateMatrix>(qs) );
    
	
	// Setting up the relaxed clock model //

    ConstantNode<double> *a = new ConstantNode<double>("a", new double(0.5) );
    ConstantNode<double> *b = new ConstantNode<double>("b", new double(0.25) );

	
	std::vector<const TypedDagNode<double> *> branchRates;
	std::vector< ContinuousStochasticNode *> branchRates_nonConst;
	for( size_t i=0; i<numBranches; i++){
        std::ostringstream br_name;
        br_name << "br(" << i << ")";
		ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new LognormalDistribution(a, b, new ConstantNode<double>("offset", new double(0.0) )));
		branchRates.push_back( tmp_branch_rate );
		branchRates_nonConst.push_back( tmp_branch_rate );
	}
	//Build a node out of the vector of nodes
    DeterministicNode< std::vector< double > >* br_vector = new DeterministicNode< std::vector< double > >( "br_vector", new VectorFunction< double >( branchRates ) );

	
	
	// Putting it all together //

	
    std::vector<std::string> names = data[0]->getTaxonNames();
    ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( trees[0]->getRoot().getAge()*2.0 ) );
    StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, div, turn, rho, "uniform", "survival", int(names.size()), names, std::vector<Clade>()) );
    
	//If we want to get a good starting tree
	//    tau->setValue( trees[0] );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
    
    // and the character model
	//    StochasticNode<CharacterData<DnaState> > *charactermodel = new StochasticNode<CharacterData <DnaState> >("S", new SimpleGTRBranchRateTimeCharEvoModel<DnaState, TimeTree>(tau, q, br_vector, true, data[0]->getNumberOfCharacters()) );
    
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters());
    phyloCTMC->setRootFrequencies( rf );
    phyloCTMC->setRateMatrix( qs_node );
    phyloCTMC->setClockRate( br_vector );
	StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC );
	charactermodel->clamp( data[0] );
    	
    
    /* add the moves */
    RbVector<Move> moves;
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 2, true ) );
    moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) );
    moves.push_back( new NarrowExchange( tau, 10.0 ) );
    moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) );
    moves.push_back( new SubtreeScale( tau, 5.0 ) );
    moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) );
    moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) );
    moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) );
    moves.push_back( new BetaSimplexMove( omega, 10.0, true, 2.0 ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(tstv, 1.0), 2, true ) );
	
    for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
        moves.push_back( new BetaSimplexMove( dynamic_cast<StochasticNode<double>* >(thetas[i]), 10.0, true, 2.0 ) );
        moves.push_back( new SlidingMove( thetas[i], 0.05, true, 2.0) );
        //    moves.push_back( new SimplexMove( ers[i], 100.0, 6, true, 2.0 ) );
		//        moves.push_back( new SimplexMove( pis[i], 100.0, 4, true, 2.0 ) );
    }
    
    // add some tree stats to monitor
    DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    std::set<DagNode*> monitoredNodes;
    //    monitoredNodes.insert( er );
    //    monitoredNodes.insert( pi );
    monitoredNodes.insert( div );
    monitors.push_back( new FileMonitor( monitoredNodes, 10, "TestUCLNRelaxedClockBHT92Model.log", "\t" ) );
    std::set<DagNode*> monitoredNodes1;
	//    monitoredNodes1.insert( er );
    for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
        monitoredNodes1.insert( thetas[i] );
    }
    monitoredNodes1.insert( rf );
    monitoredNodes1.insert( treeHeight );
    monitors.push_back( new FileMonitor( monitoredNodes1, 10, "TestUCLNRelaxedClockBHT92ModelSubstRates.log", "\t" ) );
    monitors.push_back( new ScreenMonitor( monitoredNodes1, 10, "\t" ) );
    std::set<DagNode*> monitoredNodes2;
    monitoredNodes2.insert( tau );
    monitors.push_back( new FileMonitor( monitoredNodes2, 10, "TestUCLNRelaxedClockBHT92Model.tree", "\t", false, false, false ) );
    
    /* instantiate the model */
    Model myModel = Model(qs[0]);
    
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    myMcmc.printOperatorSummary();
    
    /* clean up */
    //    for (size_t i = 0; i < 10; ++i) {
    //        delete x[i];
    //    }
    //    delete [] x;
    delete div;
    //    delete sigma;
    //    delete a;
    //    delete b;
    //    delete c;
    
    std::cout << "Finished GTR model test." << std::endl;
    
    return true;
}
bool TestBranchHeterogeneousHkyModel::run( void ) {
    
    /* First, we read in the data */
    // the matrix
    std::vector<AbstractCharacterData*> data = NclReader::getInstance().readMatrices(alignmentFilename);
    std::cout << "Read " << data.size() << " matrices." << std::endl;
    std::cout << data[0] << std::endl;
        
    
    /* set up the model graph */
    
    //////////////////////
    // first the priors //
    //////////////////////
    
    // birth-death process priors
    StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("div_lower", new double(0.0)), new ConstantNode<double>("div_upper", new double(100.0)) ));
    ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0));
    ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0));
    
    // hky model priors
    ConstantNode<std::vector<double> > *bfPrior = new ConstantNode<std::vector<double> >( "bfPrior", new std::vector<double>(4,1.0) );
    ConstantNode< double > *tstvPrior = new ConstantNode< double >( "tstvPrior", new double(1.0) );
	
    // root frequencies
    StochasticNode<std::vector<double> > *rf = new StochasticNode<std::vector<double> >( "rf", new DirichletDistribution(bfPrior) );
	
	
	
//    // first the hyper-priors of the clock model
    ConstantNode<double> *a = new ConstantNode<double>("a", new double(0.5) );
    ConstantNode<double> *b = new ConstantNode<double>("b", new double(0.25) );
//	
//    
    // then the parameters
    ContinuousStochasticNode *expectLN = new ContinuousStochasticNode( "UCLN.expectation", new ExponentialDistribution(a) ); // the expectation of the LN dist so mu = log(expectLN) - (sigLN^2)/2
    ContinuousStochasticNode *sigLN = new ContinuousStochasticNode("UCLN.variance", new ExponentialDistribution(b) );	
    DeterministicNode<double> *logExpLN = new DeterministicNode<double>("logUCLN.exp", new LnFunction(expectLN) );
   DeterministicNode<double> *squareSigLN = new DeterministicNode<double>("squareSigLN", new BinaryMultiplication<double, double, double>(sigLN, sigLN) );
   DeterministicNode<double> *divSqSigLN = new DeterministicNode<double>("divSqSigLN", new BinaryDivision<double, double, double>(squareSigLN, new ConstantNode<double>( "2", new double (2.0))) );
   DeterministicNode<double> *muValLN = new DeterministicNode<double>("MuValLN", new BinarySubtraction<double, double, double>(logExpLN, divSqSigLN) );
	
   
    //Declaring a vector of HKY matrices
	size_t numBranches = 2*data[0]->getNumberOfTaxa() - 2;
    std::vector<StochasticNode < std::vector<double> >* > pis;
    std::vector< const TypedDagNode< RateMatrix >* > qs;
	StochasticNode < double >* tstv = new ContinuousStochasticNode("tstv", new ExponentialDistribution( tstvPrior ) );
//
//	
    // declaring a vector of clock rates
	std::vector<const TypedDagNode<double> *> branchRates;
	std::vector< ContinuousStochasticNode *> branchRates_nonConst;
    	
	
	
	
    for (unsigned int i = 0 ; i < numBranches ; i++ ) {
        // construct the per branch rate matrix
        std::ostringstream pi_name;
        pi_name << "pi(" << i << ")";
        pis.push_back(new StochasticNode<std::vector<double> >( pi_name.str(), new DirichletDistribution(bfPrior) ) );
        std::ostringstream q_name;
        q_name << "q(" << i << ")";
        qs.push_back(new DeterministicNode<RateMatrix>( q_name.str(), new HkyRateMatrixFunction( tstv, pis[i]) ));
        std::cout << "Q:\t" << qs[i]->getValue() << std::endl;        
        
       // construct the per branch clock rate
       std::ostringstream br_name;
        br_name << "br(" << i << ")";
		ContinuousStochasticNode* tmp_branch_rate = new ContinuousStochasticNode( br_name.str(), new LognormalDistribution(muValLN, sigLN, new ConstantNode<double>("offset", new double(0.0) )));
		branchRates.push_back( tmp_branch_rate );
		branchRates_nonConst.push_back( tmp_branch_rate );
	}
    // build the vector containing all rates/rate-matrices
    // instead of independent rates/rate-matrices we could have used anything that specifies a distribution on a set of values
    // e.g. a mixture, DPP or an autocorrelated model
    DeterministicNode< std::vector< double > >* br_vector = new DeterministicNode< std::vector< double > >( "br_vector", new VectorFunction< double >( branchRates ) );
    DeterministicNode< RbVector< RateMatrix > >* qs_node = new DeterministicNode< RbVector< RateMatrix > >( "q_vector", new RbVectorFunction<RateMatrix>(qs) );
    
    
    // create the variables for the rate variation across sites
    // we use the standard 4 categorical gamma rate variation
    // though, any other rates could be used too as long as they are normalized
    ConstantNode<double> *alpha_prior = new ConstantNode<double>("alpha_prior", new double(0.5) );
    ContinuousStochasticNode *alpha = new ContinuousStochasticNode("alpha", new ExponentialDistribution(alpha_prior) );
    
    ConstantNode<double> *q1 = new ConstantNode<double>("q1", new double(0.125) );
    DeterministicNode<double> *q1_value = new DeterministicNode<double>("q1_value", new QuantileFunction(q1, new GammaDistribution(alpha, alpha) ) );
    ConstantNode<double> *q2 = new ConstantNode<double>("q2", new double(0.375) );
    DeterministicNode<double> *q2_value = new DeterministicNode<double>("q2_value", new QuantileFunction(q2, new GammaDistribution(alpha, alpha) ) );
    ConstantNode<double> *q3 = new ConstantNode<double>("q3", new double(0.625) );
    DeterministicNode<double> *q3_value = new DeterministicNode<double>("q3_value", new QuantileFunction(q3, new GammaDistribution(alpha, alpha) ) );
    ConstantNode<double> *q4 = new ConstantNode<double>("q4", new double(0.875) );
    DeterministicNode<double> *q4_value = new DeterministicNode<double>("q4_value", new QuantileFunction(q4, new GammaDistribution(alpha, alpha) ) );
    std::vector<const TypedDagNode<double>* > gamma_rates = std::vector<const TypedDagNode<double>* >();
    gamma_rates.push_back(q1_value);
    gamma_rates.push_back(q2_value);
    gamma_rates.push_back(q3_value);
    gamma_rates.push_back(q4_value);
    
    DeterministicNode<std::vector<double> > *site_rates = new DeterministicNode<std::vector<double> >( "site_rates", new VectorFunction<double>(gamma_rates) );
    DeterministicNode<std::vector<double> > *site_rates_norm = new DeterministicNode<std::vector<double> >( "site_rates_norm", new NormalizeVectorFunction(site_rates) );
    // we actually do not use different probabilities per rate (yet!)
   // ConstantNode<std::vector<double> > *site_rate_probs = new ConstantNode<std::vector<double> >( "site_rate_probs", new std::vector<double>(4,1.0/4.0) );
    
    
	
    // create the stochastic node for the tree
    // we use a birth-death process prior and thus a time-tree
    // we could use as well an unrooted tree
    std::vector<std::string> names = data[0]->getTaxonNames();
    ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( 2.0 ) );
    std::vector<RevBayesCore::Taxon> taxa;
    for (size_t i = 0; i < names.size(); ++i)
    {
        taxa.push_back( Taxon( names[i] ) );
    }
    StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, div, turn, rho, "uniform", "survival", taxa, std::vector<Clade>()) );
    
	
//	//rescale the tree so that its root age is 1
	TimeTree *t = tau->getValue().clone();
	const TopologyNode &root = t->getRoot();
	TreeUtilities::rescaleTree(t, &t->getRoot(), 1.0 / root.getAge());
//	
    tau->setValue( t );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
    
    // and the character model
    //GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *charModel = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, true, data[0]->getNumberOfCharacters() );
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *charModel = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters() );
    // set the branch heterogeneous substitution matrices
    // if you set instead of a vector a single matrix, then you get a homogeneous model
    charModel->setRateMatrix( qs_node );
    charModel->setRootFrequencies( rf );
    // set the per branch clock rates
    // if you instead specify a single rate, you get a strict clock model
    charModel->setClockRate( br_vector );
    // specify the rate variation across sites
    // if you skip this then you get the model without rate variation across sites.
    charModel->setSiteRates( site_rates_norm );
	
    StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", charModel );
    charactermodel->clamp( data[0] );
    
    
    /* add the moves */
    RbVector<Move> moves;
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(div, 1.0), 2, true ) );
    moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) );
    moves.push_back( new NarrowExchange( tau, 10.0 ) );
    moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) );
    moves.push_back( new SubtreeScale( tau, 5.0 ) );
//Fixintg the root age at 1:
	//  moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) );
	//    moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) );
//test: only 20 instead of 30
    moves.push_back( new NodeTimeSlideUniform( tau, 20.0 ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(tstv, 1.0), 2, true ) );
    moves.push_back( new SimplexSingleElementScale( rf, 10.0, true, 2.0 ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(alpha, 1.0), 2, true ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(expectLN, 1.0), 2, true ) );
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(sigLN, 1.0), 2, true ) );
	std::vector<StochasticNode<double> * > rates;
	for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
		rates.push_back( branchRates_nonConst[i] );
	}
	moves.push_back( new RateAgeBetaShift( tau, rates, 1.0, true, 10.0) );                                                         //!<  constructor

	
	
    for (unsigned int i = 0 ; i < numBranches ; i ++ ) {
        moves.push_back( new SimplexSingleElementScale( pis[i], 10.0, true, 2.0 ) );
        moves.push_back( new MetropolisHastingsMove( new ScaleProposal(branchRates_nonConst[i], 1.0), 1, true ) );
    }
    
    // add some tree stats to monitor
    DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    std::set<DagNode*> monitoredNodes;
    monitoredNodes.insert( tstv );
    monitoredNodes.insert( treeHeight );
    monitors.push_back( new ScreenMonitor( monitoredNodes, 1, "\t" ) );
    std::set<DagNode*> monitoredNodes2;
    monitoredNodes2.insert( tau );
    monitors.push_back( new FileMonitor( monitoredNodes2, 10, "TestBranchHeterogeneousHkyModel.tree", "\t", false, false, false ) );
    
    /* instantiate the model */
    Model myModel = Model(qs[0]);
	
    monitors.push_back( new ModelMonitor( 10, "TestBranchHeterogeneousHkyModel.log", "\t" ) );
	
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
//	myMcmc.burnin(1000, 100);
    myMcmc.run(mcmcGenerations);
    
    myMcmc.printOperatorSummary();
    
    /* clean up */
    //    for (size_t i = 0; i < 10; ++i) {
    //        delete x[i];
    //    }
    //    delete [] x;
    delete div;
    //    delete sigma;
    //    delete a;
    //    delete b;
    //    delete c;
    
    std::cout << "Finished GTR model test." << std::endl;
    
    return true;
}
Ejemplo n.º 17
0
void HillClimber::replaceDag(const RbVector<Move> &mvs, const RbVector<Monitor> &mons)
{
    
    moves.clear();
    monitors.clear();
    
    // we need to replace the DAG nodes of the monitors and moves
    const std::vector<DagNode*>& modelNodes = model->getDagNodes();
    for (RbConstIterator<Move> it = mvs.begin(); it != mvs.end(); ++it)
    {
        
        Move *theMove = it->clone();
        std::vector<DagNode*> nodes = theMove->getDagNodes();
        for (std::vector<DagNode*>::const_iterator j = nodes.begin(); j != nodes.end(); ++j)
        {
            
            RevBayesCore::DagNode *theNode = *j;
            
            // error checking
            if ( theNode->getName() == "" )
            {
                std::cerr << "The move has the following nodes:\n";
                for (std::vector<DagNode*>::const_iterator k = nodes.begin(); k != nodes.end(); ++k)
                {
                    std::cerr << (*k)->getName() << std::endl;
                }
                std::cerr << "The model has the following nodes:\n";
                for (std::vector<DagNode*>::const_iterator k = modelNodes.begin(); k != modelNodes.end(); ++k)
                {
                    std::cerr << (*k)->getName() << std::endl;
                }
                throw RbException( "Unable to connect move '" + theMove->getMoveName() + "' to DAG copy because variable name was lost");
            }
            
            DagNode* theNewNode = NULL;
            for (std::vector<DagNode*>::const_iterator k = modelNodes.begin(); k != modelNodes.end(); ++k)
            {
                if ( (*k)->getName() == theNode->getName() )
                {
                    theNewNode = *k;
                    break;
                }
            }
            // error checking
            if ( theNewNode == NULL )
            {
                throw RbException("Cannot find node with name '" + theNode->getName() + "' in the model but received a move working on it.");
            }
            
            // now swap the node
            theMove->swapNode( *j, theNewNode );
        }
        moves.push_back( *theMove );
        delete theMove;
    }
    
    for (RbConstIterator<Monitor> it = mons.begin(); it != mons.end(); ++it)
    {
        Monitor *theMonitor = it->clone();
        std::vector<DagNode*> nodes = theMonitor->getDagNodes();
        for (std::vector<DagNode*>::const_iterator j = nodes.begin(); j != nodes.end(); ++j)
        {
            
            RevBayesCore::DagNode *theNode = (*j);
            
            // error checking
            if ( theNode->getName() == "" )
            {
                throw RbException( "Unable to connect monitor to DAG copy because variable name was lost");
            }
            
            DagNode* theNewNode = NULL;
            for (std::vector<DagNode*>::const_iterator k = modelNodes.begin(); k != modelNodes.end(); ++k)
            {
                if ( (*k)->getName() == theNode->getName() )
                {
                    theNewNode = *k;
                    break;
                }
            }
            // error checking
            if ( theNewNode == NULL )
            {
                throw RbException("Cannot find node with name '" + theNode->getName() + "' in the model but received a monitor working on it.");
            }
            
            // now swap the node
            theMonitor->swapNode( *j, theNewNode );
        }
        monitors.push_back( *theMonitor );
        delete theMonitor;
        
    }
    
}
Ejemplo n.º 18
0
void MonteCarloAnalysis::runPriorSampler( size_t kIterations , RbVector<StoppingRule> rules )
{
    
    // Let user know what we are doing
    std::stringstream ss;
    if ( runs[0]->getCurrentGeneration() == 0 )
    {
        ss << "\n";
        ss << "Running prior MCMC simulation\n";
    }
    else
    {
        ss << "Appending to previous MCMC simulation of " << runs[0]->getCurrentGeneration() << " iterations\n";
    }
    ss << "This simulation runs " << replicates << " independent replicate" << (replicates > 1 ? "s" : "") << ".\n";
    ss << runs[0]->getStrategyDescription();
    RBOUT( ss.str() );
    
    // Initialize objects needed by chain
    for (size_t i=0; i<replicates; ++i)
    {
        runs[i]->initializeSampler(true);
    }
    
    if ( runs[0]->getCurrentGeneration() == 0 )
    {
        // Monitor
        for (size_t i=0; i<replicates; ++i)
        {
            runs[i]->startMonitors( kIterations );
            runs[i]->monitor(0);
        }
    }
    
    // reset the counters for the move schedules
    for (size_t i=0; i<replicates; ++i)
    {
        runs[i]->reset();
    }
    
    // reset the stopping rules
    for (size_t i=0; i<rules.size(); ++i)
    {
        rules[i].runStarted();
    }
    
    
    // Run the chain
    bool finished = false;
    bool converged = false;
    size_t gen = runs[0]->getCurrentGeneration();
    do {
        ++gen;
        for (size_t i=0; i<replicates; ++i)
        {
            runs[i]->nextCycle(true);
            
            // Monitor
            runs[i]->monitor(gen);
            
        }
        
        converged = true;
        size_t numConvergenceRules = 0;
        // do the stopping test
        for (size_t i=0; i<rules.size(); ++i)
        {
            
            if ( rules[i].isConvergenceRule() )
            {
                converged &= rules[i].checkAtIteration(gen) && rules[i].stop( gen );
                ++numConvergenceRules;
            }
            else
            {
                if ( rules[i].checkAtIteration(gen) && rules[i].stop( gen ) )
                {
                    finished = true;
                    break;
                }
            }
            
        }
        converged &= numConvergenceRules > 0;
        
    } while ( finished == false && converged == false);
    
}
Ejemplo n.º 19
0
bool TestGtrGammaModel::run( void ) {
    
    /* First, we read in the data */
    // the matrix
    NclReader& reader = NclReader::getInstance();
    std::vector<AbstractCharacterData*> data = reader.readMatrices(alignmentFilename);
    std::cout << "Read " << data.size() << " matrices." << std::endl;
    
    std::vector<TimeTree*> trees = NclReader::getInstance().readTimeTrees( treeFilename );
    std::cout << "Read " << trees.size() << " trees." << std::endl;
    std::cout << trees[0]->getNewickRepresentation() << std::endl;
    
    /* set up the model graph */
    
    //////////////////////
    // first the priors //
    //////////////////////
    
    // birth-death process priors
    StochasticNode<double> *div = new StochasticNode<double>("diversification", new UniformDistribution(new ConstantNode<double>("", new double(0.0)), new ConstantNode<double>("", new double(100.0)) ));
    ConstantNode<double> *turn = new ConstantNode<double>("turnover", new double(0.0));
    ConstantNode<double> *rho = new ConstantNode<double>("rho", new double(1.0));
    // gtr model priors
    ConstantNode<std::vector<double> > *bf = new ConstantNode<std::vector<double> >( "bf", new std::vector<double>(4,1.0) );
    ConstantNode<std::vector<double> > *e = new ConstantNode<std::vector<double> >( "e", new std::vector<double>(6,1.0) );
    
    std::cout << "bf:\t" << bf->getValue() << std::endl;
    std::cout << "e:\t" << e->getValue() << std::endl;
    
    // then the parameters
    StochasticNode<std::vector<double> > *pi = new StochasticNode<std::vector<double> >( "pi", new DirichletDistribution(bf) );
    StochasticNode<std::vector<double> > *er = new StochasticNode<std::vector<double> >( "er", new DirichletDistribution(e) );
    
    //Rate heterogeneity
    ConstantNode<double> *alpha_prior = new ConstantNode<double>("alpha_prior", new double(0.5) );
    ContinuousStochasticNode *alpha = new ContinuousStochasticNode("alpha", new ExponentialDistribution(alpha_prior) );
    
    alpha->setValue( new double(0.5) );
    std::cout << "alpha:\t" << alpha->getValue() << std::endl;
    
    ConstantNode<double> *q1 = new ConstantNode<double>("q1", new double(0.125) );
    DeterministicNode<double> *q1_value = new DeterministicNode<double>("q1_value", new QuantileFunction(q1, new GammaDistribution(alpha, alpha) ) );
//    StochasticNode<double> *q1_value = new StochasticNode<double>("q1_value", new GammaDistribution(alpha, alpha) );
    ConstantNode<double> *q2 = new ConstantNode<double>("q2", new double(0.375) );
    DeterministicNode<double> *q2_value = new DeterministicNode<double>("q2_value", new QuantileFunction(q2, new GammaDistribution(alpha, alpha) ) );
//    StochasticNode<double> *q2_value = new StochasticNode<double>("q2_value", new GammaDistribution(alpha, alpha) );
    ConstantNode<double> *q3 = new ConstantNode<double>("q3", new double(0.625) );
    DeterministicNode<double> *q3_value = new DeterministicNode<double>("q3_value", new QuantileFunction(q3, new GammaDistribution(alpha, alpha) ) );
//    StochasticNode<double> *q3_value = new StochasticNode<double>("q3_value", new GammaDistribution(alpha, alpha) );
    ConstantNode<double> *q4 = new ConstantNode<double>("q4", new double(0.875) );
    DeterministicNode<double> *q4_value = new DeterministicNode<double>("q4_value", new QuantileFunction(q4, new GammaDistribution(alpha, alpha) ) );
//    StochasticNode<double> *q4_value = new StochasticNode<double>("q4_value", new GammaDistribution(alpha, alpha) );
    std::vector<const TypedDagNode<double>* > gamma_rates = std::vector<const TypedDagNode<double>* >();
    gamma_rates.push_back(q1_value);
    gamma_rates.push_back(q2_value);
    gamma_rates.push_back(q3_value);
    gamma_rates.push_back(q4_value);
    
    DeterministicNode<std::vector<double> > *site_rates = new DeterministicNode<std::vector<double> >( "site_rates", new VectorFunction<double>(gamma_rates) );
    // currently unused
    // ConstantNode<std::vector<double> > *site_rate_probs = new ConstantNode<std::vector<double> >( "site_rate_probs", new std::vector<double>(4,1.0/4.0) );

    DeterministicNode<std::vector<double> > *site_rates_norm = new DeterministicNode<std::vector<double> >( "site_rates_norm", new NormalizeVectorFunction(site_rates) );
    
    pi->setValue( new std::vector<double>(4,1.0/4.0) );
    er->setValue( new std::vector<double>(6,1.0/6.0) );
    
    std::cout << "pi:\t" << pi->getValue() << std::endl;
    std::cout << "er:\t" << er->getValue() << std::endl;
    std::cout << "rates:\t" << site_rates->getValue() << std::endl;
    std::cout << "rates:\t" << site_rates_norm->getValue() << std::endl;

    DeterministicNode<RateMatrix> *q = new DeterministicNode<RateMatrix>( "Q", new GtrRateMatrixFunction(er, pi) );
    
    std::cout << "Q:\t" << q->getValue() << std::endl;
    
    std::vector<std::string> names = data[0]->getTaxonNames();
    ConstantNode<double>* origin = new ConstantNode<double>( "origin", new double( trees[0]->getRoot().getAge()*2.0 ) );
    std::vector<RevBayesCore::Taxon> taxa;
    for (size_t i = 0; i < names.size(); ++i)
    {
        taxa.push_back( Taxon( names[i] ) );
    }
    StochasticNode<TimeTree> *tau = new StochasticNode<TimeTree>( "tau", new ConstantRateBirthDeathProcess(origin, NULL, div, turn, rho, "uniform", "survival", taxa, std::vector<Clade>()) );
    
    tau->setValue( trees[0] );
    std::cout << "tau:\t" << tau->getValue() << std::endl;
    
    // and the character model
// (unused)    size_t numChar = data[0]->getNumberOfCharacters();
    GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree> *phyloCTMC = new GeneralBranchHeterogeneousCharEvoModel<DnaState, TimeTree>(tau, 4, true, data[0]->getNumberOfCharacters());
    phyloCTMC->setSiteRates( site_rates_norm );
    phyloCTMC->setRateMatrix( q );
    StochasticNode< AbstractCharacterData > *charactermodel = new StochasticNode< AbstractCharacterData >("S", phyloCTMC );
    charactermodel->clamp( static_cast<DiscreteCharacterData<DnaState> *>( data[0] ) );
    
    std::cout << "LnL:\t\t" << charactermodel->getLnProbability() << std::endl;
    
    /* add the moves */
    RbVector<Move> moves;
//    moves.push_back( new ScaleMove(div, 1.0, true, 2.0) );
//    moves.push_back( new NearestNeighborInterchange( tau, 5.0 ) );
//    moves.push_back( new NarrowExchange( tau, 10.0 ) );
//    moves.push_back( new FixedNodeheightPruneRegraft( tau, 2.0 ) );
//    moves.push_back( new SubtreeScale( tau, 5.0 ) );
//    moves.push_back( new TreeScale( tau, 1.0, true, 2.0 ) );
//    moves.push_back( new NodeTimeSlideUniform( tau, 30.0 ) );
//    moves.push_back( new RootTimeSlide( tau, 1.0, true, 2.0 ) );
//    moves.push_back( new SimplexMove( er, 10.0, 1, 0, true, 2.0 ) );
//    moves.push_back( new SimplexMove( pi, 10.0, 1, 0, true, 2.0 ) );
//    moves.push_back( new SimplexMove( er, 100.0, 6, 0, true, 2.0 ) );
//    moves.push_back( new SimplexMove( pi, 100.0, 4, 0, true, 2.0 ) );
    
    moves.push_back( new MetropolisHastingsMove( new ScaleProposal(alpha, 1.0), 1, true) );
//    moves.push_back( new ScaleMove(q1_value, 1.0, true, 2.0) );
//    moves.push_back( new ScaleMove(q2_value, 1.0, true, 2.0) );
//    moves.push_back( new ScaleMove(q3_value, 1.0, true, 2.0) );
//    moves.push_back( new ScaleMove(q4_value, 1.0, true, 2.0) );
    
    // add some tree stats to monitor
    DeterministicNode<double> *treeHeight = new DeterministicNode<double>("TreeHeight", new TreeHeightStatistic(tau) );
    
    /* add the monitors */
    RbVector<Monitor> monitors;
    std::set<DagNode*> monitoredNodes;
//    monitoredNodes.insert( er );
//    monitoredNodes.insert( pi );
//    monitoredNodes.insert( q );
//    monitoredNodes.insert( q1_value );
//    monitoredNodes.insert( q2_value );
//    monitoredNodes.insert( q3_value );
//    monitoredNodes.insert( q4_value );
    monitoredNodes.insert( site_rates_norm );
    monitoredNodes.insert( alpha );
    monitoredNodes.insert( treeHeight );
    monitors.push_back( new FileMonitor( monitoredNodes, 1000, "TestGtrGammaModelSubstRates.log", "\t" ) );
    monitors.push_back( new ScreenMonitor( monitoredNodes, 1000, "\t" ) );
    std::set<DagNode*> monitoredNodes2;
    monitoredNodes2.insert( tau );
    monitors.push_back( new FileMonitor( monitoredNodes2, 1000, "TestGtrGammaModel.tree", "\t", false, false, false ) );
    
    /* instantiate the model */
    Model myModel = Model(q);
    
    /* instiate and run the MCMC */
    Mcmc myMcmc = Mcmc( myModel, moves, monitors );
    myMcmc.run(mcmcGenerations);
    
    myMcmc.printOperatorSummary();
    
    /* clean up */
    //    for (size_t i = 0; i < 10; ++i) {
    //        delete x[i];
    //    }
    //    delete [] x;
    delete div;
    //    delete sigma;
    //    delete a;
    //    delete b;
    //    delete c;
    
    std::cout << "Finished GTR+Gamma model test." << std::endl;
    
    return true;
}