Ejemplo n.º 1
0
/*----------------------------------------------------------------------------------------------------------------------
|	Returns the number of univents for site `site'. Assumes `is_valid' is true and `site' is less than the length of the
|	`univents' vector.
*/
unsigned Univents::getNumEvents(
  unsigned site) const		/**< is the site of interest */
	{
	PHYCAS_ASSERT(this->is_valid);
	PHYCAS_ASSERT(site < univents.size());
	return (unsigned)univents.at(site).size();
	}
Ejemplo n.º 2
0
/*----------------------------------------------------------------------------------------------------------------------
|	Adds data currently stored in `sim_pattern_map' to the patterns already in `other'. The value of `mult' is used to
|	modify the counts before they are added to `other'; that is, the count of each pattern added to `other' is the 
|	original count multiplied by `mult'. Normally, `mult' would be specified to be 1.0, but in some cases it is
|	necessary to build up SimData objects that represent averages of other SimData objects, and it is these situations
|	where `mult' is handy. Assumes that `mult' is positive and non-zero. Assumes that `pattern_length' for this SimData
|	object is identical to the `pattern_length' of `other'.
*/
void SimData::addDataTo(
  SimData & other, 			/**< is the SimData object that will receive the data currently contained in this SimData object */
  pattern_count_t mult)		/**< is the factor multiplied by each pattern's count before pattern is stored in `other' */
	{
#if DISABLED_UNTIL_SIMULATION_WORKING_WITH_PARTITIONING
	PHYCAS_ASSERT(mult > 0.0);

	// If this object has no patterns, return immediately
	if (total_count == 0.0)
		return;

	// If other is empty, then it most likely needs to be initialized
	if (other.getTotalCount() == 0)
		{
		other.resetPatternLength(pattern_length);
		}
	PHYCAS_ASSERT(pattern_length == other.getPatternLength());

	for (pattern_map_t::iterator it = sim_pattern_map.begin(); it != sim_pattern_map.end(); ++it)
		{
		pattern_count_t count = it->second;
		
		// GetCurrPattern returns a workspace for building up a pattern to be added to other
		int8_vect_t & other_pattern = other.getCurrPattern();

		// Copy pattern represented by *it to other's workspace
		std::copy(it->first.begin(), it->first.end(), other_pattern.begin());

		// Add the pattern in other's workspace to other's pattern map
		pattern_count_t mult_count = mult*count;
		other.insertPattern(mult_count);
		}
#endif
	}
Ejemplo n.º 3
0
/*----------------------------------------------------------------------------------------------------------------------
|	Sets the data members `num_patterns', `num_rates' and `num_states', which determine the dimensions of all 
|	CondLikelihood objects stored. If the `cl_stack' is not currently empty and if the new conditional likelihood array
|	length is greater than the current length, all existing objects in `cl_stack' are deleted so that CLAs supplied to
|	the tree in the future will be at least the minimum length needed. Because it is critical that CLAs already checked
|	out to a tree be removed if the new length is longer than the old length (otherwise, CLAs that are too short will 
|	be used in the future), this function also checks to make sure there are no CLAs currently checked out. If there are
|	checked out CLAs, an XLikelihood exception is thrown. This function has no effect unless the supplied arguments
|	imply that newly-created CLAs will be longer than the existing ones. It is somewhat wastefull to leave in CLAs that
|	are longer than they need to be, but perhaps more wasteful to continually recall and delete all existing CLAs just
|	to ensure that their length is exactly correct.
*/
void CondLikelihoodStorage::setCondLikeDimensions(const uint_vect_t & np, const uint_vect_t & nr, const uint_vect_t & ns)
	{
	unsigned sz = (unsigned)np.size();
	PHYCAS_ASSERT(nr.size() == sz);
	PHYCAS_ASSERT(ns.size() == sz);
	
	bool no_old = (num_patterns.size() == 0) || (num_rates.size() == 0) || (num_states.size() == 0);
	bool old = !no_old;
	
	unsigned newlen = 0;
	unsigned oldlen = 0;
	for (unsigned i = 0; i < sz; ++i)
		{
		PHYCAS_ASSERT(np[i] > 0);
		PHYCAS_ASSERT(nr[i] > 0);
		PHYCAS_ASSERT(ns[i] > 0);
		newlen += np[i]*nr[i]*ns[i];
		if (old)
			oldlen += num_patterns[i]*num_rates[i]*num_states[i];
		}
	
	if (newlen > oldlen)
		{
		clearStack();
		num_patterns.resize(sz);
		num_rates.resize(sz);
		num_states.resize(sz);
		std::copy(np.begin(), np.end(), num_patterns.begin());
		std::copy(nr.begin(), nr.end(), num_rates.begin());
		std::copy(ns.begin(), ns.end(), num_states.begin());
		}
	}
Ejemplo n.º 4
0
/*----------------------------------------------------------------------------------------------------------------------
|   Randomly chooses a node to serve as node Z (the bottom of the three nodes involved in a Larget-Simon move). The
|   supplied node `middle' is the node serving as Y. In the figure below, the nodes labeled Z are all possible
|   candidates for the return value of this function. The node selected as Z should be the owner of the lowermost edge
|   involved in the move (X owns the uppermost edge and Y owns the middle edge).
|>
|	     X  X  X
|	      \ | /
|          \|/
|	  Z  Z  Y
|	   \ | /
|	    \|/
|	     Z
|	     |
|>
*/
TreeNode * LargetSimonMove::chooseZ(
  TreeNode * middle)    /**< is the middle node (Y) */
    {
    TreeNode * nd = NULL;
	TreeNode * U = middle->GetParent();
	PHYCAS_ASSERT(U != NULL);
	unsigned uchildren = U->CountChildren();
	unsigned which_child = rng->SampleUInt(uchildren);
	if (which_child == 0)
		{
		// Selected "child" is actually U's parent
		nd = U;
		}
	else
		{
		// Selected child is one of U's actual children (but cannot be equal to middle)
		unsigned k = 1;
		for (nd = U->GetLeftChild(); nd != NULL; nd = nd->GetRightSib())
			{
			if (nd == middle)
				continue;
			else
				{
				if (k == which_child)
					break;
				++k;
				}
			}
		PHYCAS_ASSERT(nd != NULL);
		}
    return nd;
    }
Ejemplo n.º 5
0
/*--------------------------------------------------------------------------------------------------------------------------
|	Called if the move is accepted.
*/
void LargetSimonMove::accept()
	{
	MCMCUpdater::accept();
	if (star_tree_proposal)
		{
		TreeNode * nd = orig_node->IsTip() ? orig_node->GetParent() : orig_node;
		PHYCAS_ASSERT(nd->IsInternal());
        if (!likelihood->getNoData())
            {
            likelihood->useAsLikelihoodRoot(nd);
            likelihood->discardCacheAwayFromNode(*orig_node);
            likelihood->discardCacheBothEnds(orig_node);
            }

		orig_node->UnselectNode();
		}
	else
		{
		PHYCAS_ASSERT(ndY->IsInternal());
        if (!likelihood->getNoData())
            {
            likelihood->useAsLikelihoodRoot(ndY);
            likelihood->discardCacheAwayFromNode(*ndY);
            likelihood->discardCacheBothEnds(ndY);
            }

		ndX->UnselectNode();
		ndY->UnselectNode();
		ndZ->UnselectNode();
		}

	reset();
	}
Ejemplo n.º 6
0
/*----------------------------------------------------------------------------------------------------------------------
|	Copies the end states in the supplied vector `p' to the data member `end_states_vec'. This function sets 'is_valid'
|   to false because changing the end states invalidates any univents currently mapped.
*/
void Univents::setEndStates(
  const int8_t * p)  /**< is the vector of end states to be copied to `end_states_vec' */
    {
	PHYCAS_ASSERT(p);
	setValid(false);   //@POL shouldn't we also set times_valid to false here?
	const unsigned n = (const unsigned)end_states_vec.size();
	PHYCAS_ASSERT(n > 0);
	for (unsigned i = 0; i < n; ++i)
		end_states_vec[i] = p[i];   //@POL should assert that p is long enough for this
    }
Ejemplo n.º 7
0
/*----------------------------------------------------------------------------------------------------------------------
|	Constructor calls the base class (MCMCUpdater) constructor and initializes its GTR pointer to NULL. Also sets the
|	`curr_value' data member to 1.0 and refreshes `curr_ln_prior' accordingly. Assumes `w' is greater than or equal to
|	zero and less than 6.
*/
GTRRateParam::GTRRateParam(
  unsigned w)		/**< The 0-based index of the relative rate being managed by this object (0=AC, 1=AG, 2=AT, 3=CG, 4=CT and 5=GT) */
  : MCMCUpdater(), gtr(NULL), which(w)
	{
	PHYCAS_ASSERT(w >= 0);
	PHYCAS_ASSERT(w < 6);
	curr_value = 1.0;
	has_slice_sampler = true;
	is_move = false;
	is_master_param = false;
	is_hyper_param = false;
	}
Ejemplo n.º 8
0
/*----------------------------------------------------------------------------------------------------------------------
|	Returns the natural logarithm of the product of the terms on the main diagonal of this SquareMatrix. If this matrix
|   is triangular, this is equal to the log of the determinant.
*/
double SquareMatrix::LogProdMainDiag() const
    {
	PHYCAS_ASSERT(dim > 0);
    double sumLog = 0.0;
    for (unsigned i = 0; i < dim; ++i)
        {
        double tmp = GetElement(i, i);
        PHYCAS_ASSERT(tmp > 0.0);
        sumLog += log(tmp);
        }
    return sumLog;
    }
Ejemplo n.º 9
0
/*----------------------------------------------------------------------------------------------------------------------
|	Ensures that the `cl_stack' contains at least `capacity' CondLikelihoodShPtr objects.
*/
void CondLikelihoodStorage::fillTo(unsigned capacity)
	{
	PHYCAS_ASSERT(std::accumulate(num_patterns.begin(), num_patterns.end(), 0) > 0);
	PHYCAS_ASSERT(std::accumulate(num_rates.begin(), num_rates.end(), 0) > 0);
	PHYCAS_ASSERT(std::accumulate(num_states.begin(), num_states.end(), 0) > 0);
	unsigned curr_sz = (unsigned)cl_stack.size();
	unsigned num_needed = (capacity > curr_sz ? capacity - curr_sz : 0);
	for (unsigned i = 0; i < num_needed; ++i)
		{
		cl_stack.push(CondLikelihoodShPtr(new CondLikelihood(num_patterns, num_rates, num_states)));
		num_created++;
		}
	}
Ejemplo n.º 10
0
/*----------------------------------------------------------------------------------------------------------------------
|	Returns number of bytes allocated for each CLA. This equals sizeof(LikeFltType) times the product of the number of
|	patterns, number of rates and number of states, summed over all partition subsets.
*/
unsigned CondLikelihoodStorage::bytesPerCLA() const
	{
	unsigned sz = num_patterns.size();
	PHYCAS_ASSERT(num_rates.size() == sz);
	PHYCAS_ASSERT(num_states.size() == sz);
	
	unsigned total = 0;
	for (unsigned i = 0; i < sz; ++i)
		{
		total += (unsigned)(num_patterns[i]*num_rates[i]*num_states[i]*sizeof(LikeFltType));
		}
	return total;
	}
Ejemplo n.º 11
0
/*----------------------------------------------------------------------------------------------------------------------
|	Adds data currently stored in `sim_pattern_map' to the patterns already in `other'. The value of `mult' is used to
|	modify the counts before they are added to `other'; that is, the count of each pattern added to `other' is the 
|	original count multiplied by `mult'. Normally, `mult' would be specified to be 1.0, but in some cases it is
|	necessary to build up SimData objects that represent averages of other SimData objects, and it is these situations
|	where `mult' is handy. Assumes that `mult' is positive and non-zero. Assumes that `pattern_length' for this SimData
|	object is identical to the `pattern_length' of `other'. This function maintains a running average. It depends on 
|	'other' keeping track of how many times it has received new data (the 'num_additions' data member is used for this).
|	
|	Here is an example of how the running average works. Suppose there are only two possible patterns, and the following 
|	pairs of pattern counts are generated by performing three posterior predictive simulations:
|>
|	[x1, y1] are the counts for the two patterns in posterior predictive simulated dataset number 1
|	[x2, y2] are the counts for the two patterns in posterior predictive simulated dataset number 2
|	[x3, y3] are the counts for the two patterns in posterior predictive simulated dataset number 3
|>
|	In each case, the sum of x and y is n, so total_count is 3n after all three have been added. The strategy before 
|	would be to add the x values and y values, then divide by 3 at the end, so the average dataset would be:
|>
|	[(x1+x2+x3)/3,  (y1+y2+y3)/3]
|>
|	Waiting to do the division until the end can lead to overflow (see the first entry in the BUGS file for details), 
|	however, so instead this function maintains a running average as follows:
|>
|	After adding pair 1:  p = 1/1, 1-p = 0
|	                      x = x*(1-p) + x1*p = x1
|	                      y = y*(1-p) + y1*p = y1
|	                      total_count = total_count*(1-p) + (x1 + y1)*p = x1 + y1
|	After adding pair 2:  p = 1/2, 1-p = 1/2
|	                      x = x*(1-p) + x2*p = (x1 + x2)/2
|	                      y = y*(1-p) + y2*p = (y1 + y2)/2 
|	                      total_count = total_count*(1-p) + (x2 + y2)*p = (x1 + x2 + y1 + y2)/2
|	After adding pair 3:  p = 1/3, 1-p = 2/3
|	                      x = x*(1-p) + x3*p = (x1 + x2 + x3)/3
|	                      y = y*(1-p) + y3*p = (y1 + y2 + y3)/3 
|	                      total_count = total_count*(1-p) + (x3 + y3)*p = (x1 + x2 + x3 + y1 + y2 + y3)/3
|>
*/
void SimData::addToRunningAverage(
  SimData & other, 
  PatternCountType mult)
	{
	PHYCAS_ASSERT(mult > 0.0);
	PHYCAS_ASSERT(total_count > 0.0);

	// If other is empty, then it most likely needs to be initialized
	if (other.getTotalCount() == 0)
		{
		other.resetPatternLength(pattern_length);
		}
	PHYCAS_ASSERT(pattern_length == other.getPatternLength());

	// calculate p, the weight to be used for this addition
	unsigned nadd = other.getNumAdditions();
	PatternCountType numer = (PatternCountType)1.0;
	PatternCountType denom = (PatternCountType)(1 + nadd);
	PatternCountType p = numer/denom;

	PatternCountType sum = 0.0;
	for (pattern_map_t::iterator it = sim_pattern_map.begin(); it != sim_pattern_map.end(); ++it)
		{
		PatternCountType count = it->second;
		
		// GetCurrPattern returns a workspace for building up a pattern to be added to other
		int8_vect_t & other_pattern = other.getCurrPattern();

		// Copy pattern represented by *it to other's workspace
		std::copy(it->first.begin(), it->first.end(), other_pattern.begin());

		// Add the pattern now in other's tmp_pattern workspace to other's pattern map
		PatternCountType mult_count = mult*count;
		sum += mult_count;
		other.insertPatternToRunningAverage(mult_count, p);
		}

	// Update total_count
	PatternCountType curr_total = other.getTotalCount();
	PatternCountType one_minus_p = (PatternCountType)(1.0 - p);
	PatternCountType new_total = curr_total*one_minus_p + sum*p;
	other.setTotalCount(new_total);
	other.setNumAdditions(nadd + 1);

	if (nadd > 1)
		{
		std::exit(0);
		}
	}
Ejemplo n.º 12
0
/*----------------------------------------------------------------------------------------------------------------------
|	Returns simulated data stored in `sim_pattern_map' as a string in the form of a two-column table. The first column
|	is labeled "Count" and the second is labeled "Pattern". The Count column shows the number of times its associated 
|	pattern was inserted using the insertPattern function. The Pattern column shows a representation of the pattern 
|	itself, using symbols for states provided in the `state_symbols' argument. The `state_symbols' argument should be 
|	a vector of single-character strings supplying a symbol to represent each state that might show up in any pattern.
|	Assumes that no state in any pattern stored in `sim_pattern_map' is greater than or equal to the length of the 
|	`state_symbols' vector (because states are used as indices into `state_symbols').
*/
std::string SimData::patternTable(
  const StringVect & state_symbols) /**< is a vector of strings representing states (e.g. {"A", "C", "G", "T"}). Note that each state symbol should be a string of length 1 (i.e. a single character) */	
	{
	PHYCAS_ASSERT(state_symbols.size() > 0);

	outstr.clear();

	if (sim_pattern_map.empty())
		{
		outstr = "Sorry, no patterns are stored";
		}
	else
		{
		outstr = "     Count  Pattern";

		for (pattern_map_t::iterator it = sim_pattern_map.begin(); it != sim_pattern_map.end(); ++it)
			{
			// Output the count first
			outstr << str(boost::format("\n%10.1f") % it->second) << "  ";

			// Now output the pattern
			std::transform(it->first.begin(), it->first.end(), std::back_inserter(outstr), LookupStateSymbol(state_symbols));
			}
		}
	return outstr;
	}
Ejemplo n.º 13
0
/*----------------------------------------------------------------------------------------------------------------------
|	Adds `tmp_pattern' to `sim_pattern_map' then passes `missing_state' to the wipePattern() function to fill 
|	`tmp_pattern' with invalid values. 
|	
|	Important! Unlike insertPattern, this function does not update total_count to reflect the new sum of pattern counts
|	over all patterns. The calling routine is responsible for updating total_count.
*/
void SimData::insertPatternToRunningAverage(
  pattern_count_t count,	/**< is the number of times this pattern was seen */
  pattern_count_t p)		/**< is the inverse of the number of datasets added */
	{
	// In debug build, check to make sure there are no elements in tmp_pattern that still equal
	// missing_state (if assert trips, it means that not all elements of tmp_pattern have been 
	// replaced with actual states)
	PHYCAS_ASSERT(std::find(tmp_pattern.begin(), tmp_pattern.end(), missing_state) == tmp_pattern.end());

	// insert the pattern stored in tmp_pattern into sim_pattern_map
	// Add tmp_pattern to sim_pattern_map if it has not yet been seen, otherwise increment the count 
	// for this pattern if it is already in the map (see item 24, p. 110, in Meyers' Efficient STL)
	pattern_map_t::iterator lowb = sim_pattern_map.lower_bound(tmp_pattern);
	if (lowb != sim_pattern_map.end() && !(sim_pattern_map.key_comp()(tmp_pattern, lowb->first)))
		{
		// Pattern is already in sim_pattern_map, so just modify its count
		pattern_count_t curr = lowb->second;
		pattern_count_t one_minus_p = (pattern_count_t)(1.0 - p);
		pattern_count_t new_value = curr*one_minus_p + count*p;
		lowb->second = new_value;

		std::ofstream f("smuteye2.txt", std::ios::out | std::ios::app);
		f << curr << '\t' << one_minus_p << '\t' << count << '\t' << p << '\t' << new_value << '\t' << "found" << std::endl;
		f.close();
		}
	else
		{
		// tmp_pattern has not yet been stored in sim_pattern_map
		sim_pattern_map.insert(lowb, pattern_map_t::value_type(tmp_pattern, count*p));

		std::ofstream f("smuteye2.txt", std::ios::out | std::ios::app);
		f << 0.0 << '\t' << (1-p) << '\t' << count << '\t' << p << '\t' << (count*p) << '\t' << "new" << std::endl;
		f.close();
		}
	}
Ejemplo n.º 14
0
/*----------------------------------------------------------------------------------------------------------------------
|	Fills the supplied `tipSpecificStateCode' array with the elements of the `end_states_vec' vector. Before calling 
|   this function, ensure that `tipSpecificStateCode' is long enough (this assumption is not checked in this function).
*/
void Univents::fillStateCodeArray(int8_t * tipSpecificStateCode) const
	{
	const unsigned n = (const unsigned)end_states_vec.size();
	PHYCAS_ASSERT(n > 0);
	for (unsigned i = 0 ; i < n; ++i)
		tipSpecificStateCode[i] = end_states_vec[i];
	}
Ejemplo n.º 15
0
/*----------------------------------------------------------------------------------------------------------------------
|	Returns a vector of univent times for site `site'. Assumes `times_valid' is true and `site' is less than the length 
|	the `times' vector. This function is not particularly efficient, and it intended primarily for transferring
|	univent times to Python code for debugging purposes.
*/
std::vector<double> Univents::getTimes(
  unsigned site) const		/**< is the site of interest */
	{
	PHYCAS_ASSERT(times_valid);
	std::vector<double> v(times.at(site));
	return v;
	}
Ejemplo n.º 16
0
/*----------------------------------------------------------------------------------------------------------------------
|	Computes the joint log working prior over all edges in the associated tree.
*/
double TreeScalerMove::recalcWorkingPriorForMove(
  bool using_tree_length_prior,         /*< true if using the Ranalla-Yang tree length prior */
  bool using_vartopol_prior) const     /*< true if using the Holder et al. variable topology reference distribution */
	{
    double ln_ref_dist = 0.0;
    if (using_tree_length_prior)
        {
        ln_ref_dist = likelihood->getTreeLengthRefDist()->GetLnPDF(tree);
        }
    else if (using_vartopol_prior)
        {
        // Computes the log of the probability of the tree under Mark Holder's variable tree topology reference distribution
        PHYCAS_ASSERT(topo_prob_calc);
        std::pair<double, double> treeprobs = topo_prob_calc->CalcTopologyLnProb(*tree, true);
        const double ln_ref_topo = treeprobs.first;
        const double ln_ref_edges = treeprobs.second;
        ln_ref_dist = ln_ref_topo + ln_ref_edges;
        }
    else
        {
        // Loop through all EdgeLenMasterParam objects and call the recalcWorkingPrior function of each.
        // Each EdgeLenMasterParam object knows how to compute the working prior for the edge lengths it controls.
        ChainManagerShPtr p = chain_mgr.lock();
        const MCMCUpdaterVect & edge_length_params = p->getEdgeLenParams();
        for (MCMCUpdaterVect::const_iterator it = edge_length_params.begin(); it != edge_length_params.end(); ++it)
            {
            if (!(*it)->isFixed())
                ln_ref_dist += (*it)->recalcWorkingPrior();
            }
        }

	return ln_ref_dist;
	}
Ejemplo n.º 17
0
/*----------------------------------------------------------------------------------------------------------------------
|	Sets `tmp_pattern'[`pos'] to the supplied `state'.
*/
void SimData::setState(
  unsigned pos, 	/**< is the position in `tmp_pattern' to set */
  int8_t state)		/**< is the state to assign to the element in `tmp_pattern' at position pos */
    {
    PHYCAS_ASSERT(pos < pattern_length);
    tmp_pattern[pos] = state;
    }
Ejemplo n.º 18
0
/*----------------------------------------------------------------------------------------------------------------------
|	Use samples in `fitting_sample' to parameterize a new BetaDistribution, which is then stored in `ref_dist'. 
|	Assumes `fitting_sample' has more than 1 element. 
*/
void MCMCUpdater::fitBetaWorkingPrior()
	{
	if (!isFixed())
		{
		PHYCAS_ASSERT(fitting_sample.size() > 1);
		double n = (double)fitting_sample.size();
		double sum = 0.0;
		double sum_of_squares = 0.0;
		for (double_vect_t::iterator i = fitting_sample.begin(); i != fitting_sample.end(); ++i)
			{
			double v = (*i);
			sum += v;
			sum_of_squares += v*v;
			}
		double mean = sum/n;
		double variance = (sum_of_squares - n*mean*mean)/(n - 1.0);

		// Let a, b be the parameters of a Beta(a,b) and let phi = a + b
		// Note that:
		//     mean = a/phi
		//   1-mean = b/phi
		// variance = a*b/[phi^2*(phi + 1)]
		// Letting z = mean*(1-mean)/variance,
		// phi can be estimated as z - 1
		// Now, a = mean*phi and b = (1-mean)*phi
		double phi = mean*(1.0-mean)/variance - 1.0;
		double a = phi*mean;
		double b = phi*(1.0 - mean);
		ref_dist = ProbDistShPtr(new BetaDistribution(a, b));
		// std::cerr << boost::str(boost::format("@@@@@@@@@ working prior is Beta(%g, %g) for updater %s: mean = %g, variance = %g") % a % b % getName() % mean % variance) << std::endl;
		}
	}
Ejemplo n.º 19
0
unsigned CondLikelihood::calcCLALength(
  const uint_vect_t & npatterns, 	/**< is a vector containing the number of data patterns for each partition subset */
  const uint_vect_t & nrates, 		/**< is a vector containing the number of among-site relative rate categories for each partition subset */
  const uint_vect_t & nstates) 		/**< is a vector containing the number of states for each partition subset */
	{
	unsigned sz = (unsigned)npatterns.size();
	PHYCAS_ASSERT(nrates.size() == sz);
	PHYCAS_ASSERT(nstates.size() == sz);
	
	unsigned total = 0;
	for (unsigned i = 0; i < sz; ++i)
		{
		total += (npatterns[i]*nrates[i]*nstates[i]);
		}
	return total;
	}
Ejemplo n.º 20
0
inline ConstCondLikelihoodShPtr InternalData::getValidParentalCondLikePtr() const
	{
	//PHYCAS_ASSERT(parCLAValid);
	//InternalData * t = const_cast<InternalData *>(this);
	//return t->getParentalCondLikePtr();
	PHYCAS_ASSERT(parWorkingCLA);
	return ConstCondLikelihoodShPtr(parWorkingCLA);
	}
Ejemplo n.º 21
0
/*----------------------------------------------------------------------------------------------------------------------
|	Computes the log of the probability of the tree under Mark Holder's tree topology reference distribution.
*/
double LargetSimonMove::recalcWorkingPrior() const
	{
	PHYCAS_ASSERT(topo_prob_calc);
	std::pair<double, double> treeprobs = topo_prob_calc->CalcTopologyLnProb(*tree, true);
	const double ln_ref_topo = treeprobs.first;
	const double ln_ref_edges = treeprobs.second;
	double ln_ref_dist = ln_ref_topo + ln_ref_edges;
	return ln_ref_dist;
	}
Ejemplo n.º 22
0
/*----------------------------------------------------------------------------------------------------------------------
|	Override of base class version adds the current GTR relative rate parameter value to the data already stored in 
|	`fitting_sample'.
*/
void GTRRateParam::educateWorkingPrior()
	{
	if (!isFixed())
		{
		PHYCAS_ASSERT(isPriorSteward());	// only prior stewards should be building working priors
		double rateparam = getCurrValueFromModel();
		fitting_sample.push_back(rateparam);
		}
	}
Ejemplo n.º 23
0
/*----------------------------------------------------------------------------------------------------------------------
|   Builds up the `binv' vector by classifying stored site patterns into the following categories (bins):
|>
|	Count   Bin description                 
|   ----------------------------------------
|	n_0     all patterns containing only A  
|	n_1     all patterns containing only C  
|	n_2     all patterns containing only G  
|	n_3     all patterns containing only T  
|	n_4     patterns containing any 2 states
|	n_5     patterns containing any 3 states
|	n_6     patterns containing any 4 states
|   ----------------------------------------
|>
|	Warning: this function currently assumes no missing data! A missing state is treated as if it were one of the
|	other states in the pattern. For example, the count for the pattern 'AAACA?GAA' would be stuffed into the 3-state
|	bin, with the implicit assumption being that the ? equals either A, C or G.
*/
void SimData::buildBinVector(
  unsigned nstates)   /**< is the number of states */
	{
    // formerly DISABLED_UNTIL_SIMULATION_WORKING_WITH_PARTITIONING
	unsigned nbins = 2*nstates - 1;
    binv.clear();
	binv.resize(nbins, 0.0);
	std::set<int8_t> state_set;
    // pattern_map_t associates int8_vect_t keys (pattern) with double values (count)
	for (pattern_map_t::iterator it = sim_pattern_map.begin(); it != sim_pattern_map.end(); ++it)
		{
		const int8_vect_t & p = it->first;
            
        // For this pattern, count distinct states by creating a set
		state_set.clear();
		unsigned last_state = UINT_MAX;
		for (int8_vect_t::const_iterator pit = p.begin(); pit != p.end(); ++pit)
			{
			int8_t curr_state = *pit;
			int cs = (int)curr_state;
			if (cs >= 0 && cs < (int)nstates)
				{
				// state is not a gap (-1), missing (nstates), or an ambiguity code (> nstates), so add to set
				state_set.insert(curr_state);
				last_state = cs;
				}
			}
		unsigned sz = (unsigned)state_set.size();
		PHYCAS_ASSERT(sz > 0);
		PHYCAS_ASSERT(sz <= nstates);

		double this_count = (double)(it->second);
		if (sz == 1)
			{
			// pattern had only one state, so add pattern count to appropriate constant site bin
			binv[last_state] += this_count;
			}
		else
			{
			// pattern had sz states, so add pattern count to appropriate variable site bin
			binv[nstates + sz - 2] += this_count;
			}
		}
    }
Ejemplo n.º 24
0
/*----------------------------------------------------------------------------------------------------------------------
|	Calls GetLnPDF function of prior to recalculate `curr_ln_prior'. This function is important because the 
|	tempting getLnPrior() member function only returns the value of `curr_ln_prior' (it does not recalculate anything).
*/
double MCMCUpdater::recalcPrior()
	{
	// Many moves will not have a prior assigned to them. In these cases, just return 0.0,
	// which will have no effect on the cumulative log-prior.
	if (!(prior || mv_prior))
		{
		//std::cerr << boost::str(boost::format("~~~ updater named '%s' cannot compute prior") % name) << std::endl;		
		return 0.0;
		}
		
	if (prior)
		{
		//std::cerr << "MCMCUpdater::recalcPrior: prior = " << prior->GetDistributionDescription() << std::endl;	//@@@
			
		double value = getCurrValueFromModel();
		try 
			{
			curr_ln_prior = prior->GetLnPDF(value);
			}
		catch(XProbDist &)
			{
			PHYCAS_ASSERT(0);
			}
		//std::cerr << boost::str(boost::format("~~~ updater named '%s' computed univariate prior for value %g to be %g") % name % value % curr_ln_prior) << std::endl;
		return curr_ln_prior;
		}
	else
		{
		double_vect_t values;
		getCurrValuesFromModel(values);	
		try 
			{
			curr_ln_prior = mv_prior->GetLnPDF(values);
			}
		catch(XProbDist &)
			{
			PHYCAS_ASSERT(0);
			}
		//std::cerr << boost::str(boost::format("~~~ updater named '%s' computed multivariate prior for vector ") % name);//temp
		//std::copy(values.begin(), values.end(), std::ostream_iterator<double>(std::cerr," "));//temp
		//std::cerr << boost::str(boost::format(" to be %g") % curr_ln_prior) << std::endl;//temp
		return curr_ln_prior;
		}
	}
Ejemplo n.º 25
0
inline ConstCondLikelihoodShPtr InternalData::getValidChildCondLikePtr() const
	{
	//@POL-NESCENT Mark, I don't understand this - why not just assert that childWorkingCLA actually
	// points to a CondLikelihood object, then return childWorkingCLA directly?
	//PHYCAS_ASSERT(childCLAValid);
	//InternalData * t = const_cast<InternalData *>(this);
	//return t->getChildCondLikePtr();
	PHYCAS_ASSERT(childWorkingCLA);
	return ConstCondLikelihoodShPtr(childWorkingCLA);
	}
Ejemplo n.º 26
0
/*----------------------------------------------------------------------------------------------------------------------
|	Chooses a random edge and changes its current length m to a new length m* using the following formula, where `lambda' is
|	a tuning parameter.
|>
|	m* = m*exp(lambda*(r.Uniform() - 0.5))
|>
*/
void LargetSimonMove::starTreeProposeNewState()
	{
	// Choose edge randomly.
	//
	unsigned numEdges = tree->GetNNodes() - 1;
	unsigned k = rng->SampleUInt(numEdges);
	unsigned i = 0;
	//@POL this loop is crying out for the for_each algorithm
	for (orig_node = tree->GetFirstPreorder(); orig_node != NULL; orig_node = orig_node->GetNextPreorder())
		{
		// All nodes have an edge associated with them except for the root
		//
		if (!orig_node->IsTipRoot())
			{
			if (i == k)
				{
				orig_edge_len = orig_node->GetEdgeLen();
				break;
				}
			++i;
			}
		}

	// Modify the edge
	//
	double m		= orig_node->GetEdgeLen();
	double mstar	= m*std::exp(lambda*(rng->Uniform() - 0.5));
	orig_node->SetEdgeLen(mstar);

	// Invalidate CLAs to ensure next likelihood calculation will be correct
	orig_node->SelectNode();
	TreeNode * nd = orig_node->IsTip() ? orig_node->GetParent() : orig_node;
	PHYCAS_ASSERT(nd->IsInternal());
	likelihood->useAsLikelihoodRoot(nd);
	likelihood->invalidateAwayFromNode(*orig_node);
	likelihood->invalidateBothEnds(orig_node);

    ChainManagerShPtr p = chain_mgr.lock();
	PHYCAS_ASSERT(p);
    JointPriorManagerShPtr jpm = p->getJointPriorManager();
    jpm->allEdgeLensModified(tree);
    //jpm->externalEdgeLensModified("external_edgelen", tree);
	}
Ejemplo n.º 27
0
/*----------------------------------------------------------------------------------------------------------------------
|	Assumes dimension > 0 and `m' exists. Subtracts each element of `other' from the corresponding element of this.
*/
void SquareMatrix::Subtract(
  const SquareMatrix & other)	 /**< is the matrix to subtract from this */
	{
	PHYCAS_ASSERT(dim > 0);
	unsigned last = dim*dim;
	double * otherp = &other.m[0][0];
	double * p = &m[0][0];
	for (unsigned i = 0; i < last; ++i)
		*p++ -= *otherp++;
	}
Ejemplo n.º 28
0
/*----------------------------------------------------------------------------------------------------------------------
|	Assumes dimension > 0 and `m' exists. Sets each element of `m' to the supplied `scalar'.
*/
void SquareMatrix::SetToScalar(
  double scalar)	/**< is the scalar value to which each element is set */
	{
	//std::cerr << "----> SquareMatrix::SetToScalar " << id << ", scalar = " << scalar << " <----" << std::endl;
	PHYCAS_ASSERT(dim > 0);
	unsigned last = dim*dim;
	double * p = &m[0][0];
	for (unsigned i = 0; i < last; ++i)
		*p++ = scalar;
	}
Ejemplo n.º 29
0
/*----------------------------------------------------------------------------------------------------------------------
|	Assumes dimension > 0 and `m' exists. Multiplies each element of `m' by the supplied `scalar'.
*/
void SquareMatrix::ScalarMultiply(
  double scalar)	/**< is the scalar value multiplied by each element */
	{
	//std::cerr << "----> SquareMatrix::ScalarMultiply " << id << ", scalar = " << scalar << " <----" << std::endl;
	PHYCAS_ASSERT(dim > 0);
	unsigned last = dim*dim;
	double * p = &m[0][0];
	for (unsigned i = 0; i < last; ++i)
		*p++ *= scalar;
	}
Ejemplo n.º 30
0
/*----------------------------------------------------------------------------------------------------------------------
|	This creates a SliceSampler object and assigns it to the `slice_sampler' data member, which is a shared_ptr that 
|	points to nothing initially. The SliceSampler constructor takes a boost::shared_ptr<Lot> (which we have available)  
|	FuncToSampleWkPtr (this object). Thus, the SliceSampler cannot be created in the constructor because "this" does
|	not yet fully exist, hence the need for a createSliceSampler() member function, which needs to be called at some 
|	point after the MCMCUpdater-derived object is created and of course before its `slice_sampler' data member starts 
|	being used.
*/
void MCMCUpdater::createSliceSampler() 
	{
	PHYCAS_ASSERT(!slice_sampler);	// don't want to do this more than once
#if defined(WEAK_FUNCTOSAMPLE)
	slice_sampler.reset(new SliceSampler(rng, FuncToSampleWkPtr(shared_from_this()))); // forces inclusion of "phycas/src/slice_sampler.hpp"
#else
	slice_sampler.reset(new SliceSampler(rng, shared_from_this())); // forces inclusion of "phycas/src/slice_sampler.hpp"
#endif
	slice_sampler->SetMaxUnits(slice_max_units);
	slice_sampler->SetXValue(curr_value);
	}