/*---------------------------------------------------------------------------------------------------------------------- | Returns the number of univents for site `site'. Assumes `is_valid' is true and `site' is less than the length of the | `univents' vector. */ unsigned Univents::getNumEvents( unsigned site) const /**< is the site of interest */ { PHYCAS_ASSERT(this->is_valid); PHYCAS_ASSERT(site < univents.size()); return (unsigned)univents.at(site).size(); }
/*---------------------------------------------------------------------------------------------------------------------- | Adds data currently stored in `sim_pattern_map' to the patterns already in `other'. The value of `mult' is used to | modify the counts before they are added to `other'; that is, the count of each pattern added to `other' is the | original count multiplied by `mult'. Normally, `mult' would be specified to be 1.0, but in some cases it is | necessary to build up SimData objects that represent averages of other SimData objects, and it is these situations | where `mult' is handy. Assumes that `mult' is positive and non-zero. Assumes that `pattern_length' for this SimData | object is identical to the `pattern_length' of `other'. */ void SimData::addDataTo( SimData & other, /**< is the SimData object that will receive the data currently contained in this SimData object */ pattern_count_t mult) /**< is the factor multiplied by each pattern's count before pattern is stored in `other' */ { #if DISABLED_UNTIL_SIMULATION_WORKING_WITH_PARTITIONING PHYCAS_ASSERT(mult > 0.0); // If this object has no patterns, return immediately if (total_count == 0.0) return; // If other is empty, then it most likely needs to be initialized if (other.getTotalCount() == 0) { other.resetPatternLength(pattern_length); } PHYCAS_ASSERT(pattern_length == other.getPatternLength()); for (pattern_map_t::iterator it = sim_pattern_map.begin(); it != sim_pattern_map.end(); ++it) { pattern_count_t count = it->second; // GetCurrPattern returns a workspace for building up a pattern to be added to other int8_vect_t & other_pattern = other.getCurrPattern(); // Copy pattern represented by *it to other's workspace std::copy(it->first.begin(), it->first.end(), other_pattern.begin()); // Add the pattern in other's workspace to other's pattern map pattern_count_t mult_count = mult*count; other.insertPattern(mult_count); } #endif }
/*---------------------------------------------------------------------------------------------------------------------- | Sets the data members `num_patterns', `num_rates' and `num_states', which determine the dimensions of all | CondLikelihood objects stored. If the `cl_stack' is not currently empty and if the new conditional likelihood array | length is greater than the current length, all existing objects in `cl_stack' are deleted so that CLAs supplied to | the tree in the future will be at least the minimum length needed. Because it is critical that CLAs already checked | out to a tree be removed if the new length is longer than the old length (otherwise, CLAs that are too short will | be used in the future), this function also checks to make sure there are no CLAs currently checked out. If there are | checked out CLAs, an XLikelihood exception is thrown. This function has no effect unless the supplied arguments | imply that newly-created CLAs will be longer than the existing ones. It is somewhat wastefull to leave in CLAs that | are longer than they need to be, but perhaps more wasteful to continually recall and delete all existing CLAs just | to ensure that their length is exactly correct. */ void CondLikelihoodStorage::setCondLikeDimensions(const uint_vect_t & np, const uint_vect_t & nr, const uint_vect_t & ns) { unsigned sz = (unsigned)np.size(); PHYCAS_ASSERT(nr.size() == sz); PHYCAS_ASSERT(ns.size() == sz); bool no_old = (num_patterns.size() == 0) || (num_rates.size() == 0) || (num_states.size() == 0); bool old = !no_old; unsigned newlen = 0; unsigned oldlen = 0; for (unsigned i = 0; i < sz; ++i) { PHYCAS_ASSERT(np[i] > 0); PHYCAS_ASSERT(nr[i] > 0); PHYCAS_ASSERT(ns[i] > 0); newlen += np[i]*nr[i]*ns[i]; if (old) oldlen += num_patterns[i]*num_rates[i]*num_states[i]; } if (newlen > oldlen) { clearStack(); num_patterns.resize(sz); num_rates.resize(sz); num_states.resize(sz); std::copy(np.begin(), np.end(), num_patterns.begin()); std::copy(nr.begin(), nr.end(), num_rates.begin()); std::copy(ns.begin(), ns.end(), num_states.begin()); } }
/*---------------------------------------------------------------------------------------------------------------------- | Randomly chooses a node to serve as node Z (the bottom of the three nodes involved in a Larget-Simon move). The | supplied node `middle' is the node serving as Y. In the figure below, the nodes labeled Z are all possible | candidates for the return value of this function. The node selected as Z should be the owner of the lowermost edge | involved in the move (X owns the uppermost edge and Y owns the middle edge). |> | X X X | \ | / | \|/ | Z Z Y | \ | / | \|/ | Z | | |> */ TreeNode * LargetSimonMove::chooseZ( TreeNode * middle) /**< is the middle node (Y) */ { TreeNode * nd = NULL; TreeNode * U = middle->GetParent(); PHYCAS_ASSERT(U != NULL); unsigned uchildren = U->CountChildren(); unsigned which_child = rng->SampleUInt(uchildren); if (which_child == 0) { // Selected "child" is actually U's parent nd = U; } else { // Selected child is one of U's actual children (but cannot be equal to middle) unsigned k = 1; for (nd = U->GetLeftChild(); nd != NULL; nd = nd->GetRightSib()) { if (nd == middle) continue; else { if (k == which_child) break; ++k; } } PHYCAS_ASSERT(nd != NULL); } return nd; }
/*-------------------------------------------------------------------------------------------------------------------------- | Called if the move is accepted. */ void LargetSimonMove::accept() { MCMCUpdater::accept(); if (star_tree_proposal) { TreeNode * nd = orig_node->IsTip() ? orig_node->GetParent() : orig_node; PHYCAS_ASSERT(nd->IsInternal()); if (!likelihood->getNoData()) { likelihood->useAsLikelihoodRoot(nd); likelihood->discardCacheAwayFromNode(*orig_node); likelihood->discardCacheBothEnds(orig_node); } orig_node->UnselectNode(); } else { PHYCAS_ASSERT(ndY->IsInternal()); if (!likelihood->getNoData()) { likelihood->useAsLikelihoodRoot(ndY); likelihood->discardCacheAwayFromNode(*ndY); likelihood->discardCacheBothEnds(ndY); } ndX->UnselectNode(); ndY->UnselectNode(); ndZ->UnselectNode(); } reset(); }
/*---------------------------------------------------------------------------------------------------------------------- | Copies the end states in the supplied vector `p' to the data member `end_states_vec'. This function sets 'is_valid' | to false because changing the end states invalidates any univents currently mapped. */ void Univents::setEndStates( const int8_t * p) /**< is the vector of end states to be copied to `end_states_vec' */ { PHYCAS_ASSERT(p); setValid(false); //@POL shouldn't we also set times_valid to false here? const unsigned n = (const unsigned)end_states_vec.size(); PHYCAS_ASSERT(n > 0); for (unsigned i = 0; i < n; ++i) end_states_vec[i] = p[i]; //@POL should assert that p is long enough for this }
/*---------------------------------------------------------------------------------------------------------------------- | Constructor calls the base class (MCMCUpdater) constructor and initializes its GTR pointer to NULL. Also sets the | `curr_value' data member to 1.0 and refreshes `curr_ln_prior' accordingly. Assumes `w' is greater than or equal to | zero and less than 6. */ GTRRateParam::GTRRateParam( unsigned w) /**< The 0-based index of the relative rate being managed by this object (0=AC, 1=AG, 2=AT, 3=CG, 4=CT and 5=GT) */ : MCMCUpdater(), gtr(NULL), which(w) { PHYCAS_ASSERT(w >= 0); PHYCAS_ASSERT(w < 6); curr_value = 1.0; has_slice_sampler = true; is_move = false; is_master_param = false; is_hyper_param = false; }
/*---------------------------------------------------------------------------------------------------------------------- | Returns the natural logarithm of the product of the terms on the main diagonal of this SquareMatrix. If this matrix | is triangular, this is equal to the log of the determinant. */ double SquareMatrix::LogProdMainDiag() const { PHYCAS_ASSERT(dim > 0); double sumLog = 0.0; for (unsigned i = 0; i < dim; ++i) { double tmp = GetElement(i, i); PHYCAS_ASSERT(tmp > 0.0); sumLog += log(tmp); } return sumLog; }
/*---------------------------------------------------------------------------------------------------------------------- | Ensures that the `cl_stack' contains at least `capacity' CondLikelihoodShPtr objects. */ void CondLikelihoodStorage::fillTo(unsigned capacity) { PHYCAS_ASSERT(std::accumulate(num_patterns.begin(), num_patterns.end(), 0) > 0); PHYCAS_ASSERT(std::accumulate(num_rates.begin(), num_rates.end(), 0) > 0); PHYCAS_ASSERT(std::accumulate(num_states.begin(), num_states.end(), 0) > 0); unsigned curr_sz = (unsigned)cl_stack.size(); unsigned num_needed = (capacity > curr_sz ? capacity - curr_sz : 0); for (unsigned i = 0; i < num_needed; ++i) { cl_stack.push(CondLikelihoodShPtr(new CondLikelihood(num_patterns, num_rates, num_states))); num_created++; } }
/*---------------------------------------------------------------------------------------------------------------------- | Returns number of bytes allocated for each CLA. This equals sizeof(LikeFltType) times the product of the number of | patterns, number of rates and number of states, summed over all partition subsets. */ unsigned CondLikelihoodStorage::bytesPerCLA() const { unsigned sz = num_patterns.size(); PHYCAS_ASSERT(num_rates.size() == sz); PHYCAS_ASSERT(num_states.size() == sz); unsigned total = 0; for (unsigned i = 0; i < sz; ++i) { total += (unsigned)(num_patterns[i]*num_rates[i]*num_states[i]*sizeof(LikeFltType)); } return total; }
/*---------------------------------------------------------------------------------------------------------------------- | Adds data currently stored in `sim_pattern_map' to the patterns already in `other'. The value of `mult' is used to | modify the counts before they are added to `other'; that is, the count of each pattern added to `other' is the | original count multiplied by `mult'. Normally, `mult' would be specified to be 1.0, but in some cases it is | necessary to build up SimData objects that represent averages of other SimData objects, and it is these situations | where `mult' is handy. Assumes that `mult' is positive and non-zero. Assumes that `pattern_length' for this SimData | object is identical to the `pattern_length' of `other'. This function maintains a running average. It depends on | 'other' keeping track of how many times it has received new data (the 'num_additions' data member is used for this). | | Here is an example of how the running average works. Suppose there are only two possible patterns, and the following | pairs of pattern counts are generated by performing three posterior predictive simulations: |> | [x1, y1] are the counts for the two patterns in posterior predictive simulated dataset number 1 | [x2, y2] are the counts for the two patterns in posterior predictive simulated dataset number 2 | [x3, y3] are the counts for the two patterns in posterior predictive simulated dataset number 3 |> | In each case, the sum of x and y is n, so total_count is 3n after all three have been added. The strategy before | would be to add the x values and y values, then divide by 3 at the end, so the average dataset would be: |> | [(x1+x2+x3)/3, (y1+y2+y3)/3] |> | Waiting to do the division until the end can lead to overflow (see the first entry in the BUGS file for details), | however, so instead this function maintains a running average as follows: |> | After adding pair 1: p = 1/1, 1-p = 0 | x = x*(1-p) + x1*p = x1 | y = y*(1-p) + y1*p = y1 | total_count = total_count*(1-p) + (x1 + y1)*p = x1 + y1 | After adding pair 2: p = 1/2, 1-p = 1/2 | x = x*(1-p) + x2*p = (x1 + x2)/2 | y = y*(1-p) + y2*p = (y1 + y2)/2 | total_count = total_count*(1-p) + (x2 + y2)*p = (x1 + x2 + y1 + y2)/2 | After adding pair 3: p = 1/3, 1-p = 2/3 | x = x*(1-p) + x3*p = (x1 + x2 + x3)/3 | y = y*(1-p) + y3*p = (y1 + y2 + y3)/3 | total_count = total_count*(1-p) + (x3 + y3)*p = (x1 + x2 + x3 + y1 + y2 + y3)/3 |> */ void SimData::addToRunningAverage( SimData & other, PatternCountType mult) { PHYCAS_ASSERT(mult > 0.0); PHYCAS_ASSERT(total_count > 0.0); // If other is empty, then it most likely needs to be initialized if (other.getTotalCount() == 0) { other.resetPatternLength(pattern_length); } PHYCAS_ASSERT(pattern_length == other.getPatternLength()); // calculate p, the weight to be used for this addition unsigned nadd = other.getNumAdditions(); PatternCountType numer = (PatternCountType)1.0; PatternCountType denom = (PatternCountType)(1 + nadd); PatternCountType p = numer/denom; PatternCountType sum = 0.0; for (pattern_map_t::iterator it = sim_pattern_map.begin(); it != sim_pattern_map.end(); ++it) { PatternCountType count = it->second; // GetCurrPattern returns a workspace for building up a pattern to be added to other int8_vect_t & other_pattern = other.getCurrPattern(); // Copy pattern represented by *it to other's workspace std::copy(it->first.begin(), it->first.end(), other_pattern.begin()); // Add the pattern now in other's tmp_pattern workspace to other's pattern map PatternCountType mult_count = mult*count; sum += mult_count; other.insertPatternToRunningAverage(mult_count, p); } // Update total_count PatternCountType curr_total = other.getTotalCount(); PatternCountType one_minus_p = (PatternCountType)(1.0 - p); PatternCountType new_total = curr_total*one_minus_p + sum*p; other.setTotalCount(new_total); other.setNumAdditions(nadd + 1); if (nadd > 1) { std::exit(0); } }
/*---------------------------------------------------------------------------------------------------------------------- | Returns simulated data stored in `sim_pattern_map' as a string in the form of a two-column table. The first column | is labeled "Count" and the second is labeled "Pattern". The Count column shows the number of times its associated | pattern was inserted using the insertPattern function. The Pattern column shows a representation of the pattern | itself, using symbols for states provided in the `state_symbols' argument. The `state_symbols' argument should be | a vector of single-character strings supplying a symbol to represent each state that might show up in any pattern. | Assumes that no state in any pattern stored in `sim_pattern_map' is greater than or equal to the length of the | `state_symbols' vector (because states are used as indices into `state_symbols'). */ std::string SimData::patternTable( const StringVect & state_symbols) /**< is a vector of strings representing states (e.g. {"A", "C", "G", "T"}). Note that each state symbol should be a string of length 1 (i.e. a single character) */ { PHYCAS_ASSERT(state_symbols.size() > 0); outstr.clear(); if (sim_pattern_map.empty()) { outstr = "Sorry, no patterns are stored"; } else { outstr = " Count Pattern"; for (pattern_map_t::iterator it = sim_pattern_map.begin(); it != sim_pattern_map.end(); ++it) { // Output the count first outstr << str(boost::format("\n%10.1f") % it->second) << " "; // Now output the pattern std::transform(it->first.begin(), it->first.end(), std::back_inserter(outstr), LookupStateSymbol(state_symbols)); } } return outstr; }
/*---------------------------------------------------------------------------------------------------------------------- | Adds `tmp_pattern' to `sim_pattern_map' then passes `missing_state' to the wipePattern() function to fill | `tmp_pattern' with invalid values. | | Important! Unlike insertPattern, this function does not update total_count to reflect the new sum of pattern counts | over all patterns. The calling routine is responsible for updating total_count. */ void SimData::insertPatternToRunningAverage( pattern_count_t count, /**< is the number of times this pattern was seen */ pattern_count_t p) /**< is the inverse of the number of datasets added */ { // In debug build, check to make sure there are no elements in tmp_pattern that still equal // missing_state (if assert trips, it means that not all elements of tmp_pattern have been // replaced with actual states) PHYCAS_ASSERT(std::find(tmp_pattern.begin(), tmp_pattern.end(), missing_state) == tmp_pattern.end()); // insert the pattern stored in tmp_pattern into sim_pattern_map // Add tmp_pattern to sim_pattern_map if it has not yet been seen, otherwise increment the count // for this pattern if it is already in the map (see item 24, p. 110, in Meyers' Efficient STL) pattern_map_t::iterator lowb = sim_pattern_map.lower_bound(tmp_pattern); if (lowb != sim_pattern_map.end() && !(sim_pattern_map.key_comp()(tmp_pattern, lowb->first))) { // Pattern is already in sim_pattern_map, so just modify its count pattern_count_t curr = lowb->second; pattern_count_t one_minus_p = (pattern_count_t)(1.0 - p); pattern_count_t new_value = curr*one_minus_p + count*p; lowb->second = new_value; std::ofstream f("smuteye2.txt", std::ios::out | std::ios::app); f << curr << '\t' << one_minus_p << '\t' << count << '\t' << p << '\t' << new_value << '\t' << "found" << std::endl; f.close(); } else { // tmp_pattern has not yet been stored in sim_pattern_map sim_pattern_map.insert(lowb, pattern_map_t::value_type(tmp_pattern, count*p)); std::ofstream f("smuteye2.txt", std::ios::out | std::ios::app); f << 0.0 << '\t' << (1-p) << '\t' << count << '\t' << p << '\t' << (count*p) << '\t' << "new" << std::endl; f.close(); } }
/*---------------------------------------------------------------------------------------------------------------------- | Fills the supplied `tipSpecificStateCode' array with the elements of the `end_states_vec' vector. Before calling | this function, ensure that `tipSpecificStateCode' is long enough (this assumption is not checked in this function). */ void Univents::fillStateCodeArray(int8_t * tipSpecificStateCode) const { const unsigned n = (const unsigned)end_states_vec.size(); PHYCAS_ASSERT(n > 0); for (unsigned i = 0 ; i < n; ++i) tipSpecificStateCode[i] = end_states_vec[i]; }
/*---------------------------------------------------------------------------------------------------------------------- | Returns a vector of univent times for site `site'. Assumes `times_valid' is true and `site' is less than the length | the `times' vector. This function is not particularly efficient, and it intended primarily for transferring | univent times to Python code for debugging purposes. */ std::vector<double> Univents::getTimes( unsigned site) const /**< is the site of interest */ { PHYCAS_ASSERT(times_valid); std::vector<double> v(times.at(site)); return v; }
/*---------------------------------------------------------------------------------------------------------------------- | Computes the joint log working prior over all edges in the associated tree. */ double TreeScalerMove::recalcWorkingPriorForMove( bool using_tree_length_prior, /*< true if using the Ranalla-Yang tree length prior */ bool using_vartopol_prior) const /*< true if using the Holder et al. variable topology reference distribution */ { double ln_ref_dist = 0.0; if (using_tree_length_prior) { ln_ref_dist = likelihood->getTreeLengthRefDist()->GetLnPDF(tree); } else if (using_vartopol_prior) { // Computes the log of the probability of the tree under Mark Holder's variable tree topology reference distribution PHYCAS_ASSERT(topo_prob_calc); std::pair<double, double> treeprobs = topo_prob_calc->CalcTopologyLnProb(*tree, true); const double ln_ref_topo = treeprobs.first; const double ln_ref_edges = treeprobs.second; ln_ref_dist = ln_ref_topo + ln_ref_edges; } else { // Loop through all EdgeLenMasterParam objects and call the recalcWorkingPrior function of each. // Each EdgeLenMasterParam object knows how to compute the working prior for the edge lengths it controls. ChainManagerShPtr p = chain_mgr.lock(); const MCMCUpdaterVect & edge_length_params = p->getEdgeLenParams(); for (MCMCUpdaterVect::const_iterator it = edge_length_params.begin(); it != edge_length_params.end(); ++it) { if (!(*it)->isFixed()) ln_ref_dist += (*it)->recalcWorkingPrior(); } } return ln_ref_dist; }
/*---------------------------------------------------------------------------------------------------------------------- | Sets `tmp_pattern'[`pos'] to the supplied `state'. */ void SimData::setState( unsigned pos, /**< is the position in `tmp_pattern' to set */ int8_t state) /**< is the state to assign to the element in `tmp_pattern' at position pos */ { PHYCAS_ASSERT(pos < pattern_length); tmp_pattern[pos] = state; }
/*---------------------------------------------------------------------------------------------------------------------- | Use samples in `fitting_sample' to parameterize a new BetaDistribution, which is then stored in `ref_dist'. | Assumes `fitting_sample' has more than 1 element. */ void MCMCUpdater::fitBetaWorkingPrior() { if (!isFixed()) { PHYCAS_ASSERT(fitting_sample.size() > 1); double n = (double)fitting_sample.size(); double sum = 0.0; double sum_of_squares = 0.0; for (double_vect_t::iterator i = fitting_sample.begin(); i != fitting_sample.end(); ++i) { double v = (*i); sum += v; sum_of_squares += v*v; } double mean = sum/n; double variance = (sum_of_squares - n*mean*mean)/(n - 1.0); // Let a, b be the parameters of a Beta(a,b) and let phi = a + b // Note that: // mean = a/phi // 1-mean = b/phi // variance = a*b/[phi^2*(phi + 1)] // Letting z = mean*(1-mean)/variance, // phi can be estimated as z - 1 // Now, a = mean*phi and b = (1-mean)*phi double phi = mean*(1.0-mean)/variance - 1.0; double a = phi*mean; double b = phi*(1.0 - mean); ref_dist = ProbDistShPtr(new BetaDistribution(a, b)); // std::cerr << boost::str(boost::format("@@@@@@@@@ working prior is Beta(%g, %g) for updater %s: mean = %g, variance = %g") % a % b % getName() % mean % variance) << std::endl; } }
unsigned CondLikelihood::calcCLALength( const uint_vect_t & npatterns, /**< is a vector containing the number of data patterns for each partition subset */ const uint_vect_t & nrates, /**< is a vector containing the number of among-site relative rate categories for each partition subset */ const uint_vect_t & nstates) /**< is a vector containing the number of states for each partition subset */ { unsigned sz = (unsigned)npatterns.size(); PHYCAS_ASSERT(nrates.size() == sz); PHYCAS_ASSERT(nstates.size() == sz); unsigned total = 0; for (unsigned i = 0; i < sz; ++i) { total += (npatterns[i]*nrates[i]*nstates[i]); } return total; }
inline ConstCondLikelihoodShPtr InternalData::getValidParentalCondLikePtr() const { //PHYCAS_ASSERT(parCLAValid); //InternalData * t = const_cast<InternalData *>(this); //return t->getParentalCondLikePtr(); PHYCAS_ASSERT(parWorkingCLA); return ConstCondLikelihoodShPtr(parWorkingCLA); }
/*---------------------------------------------------------------------------------------------------------------------- | Computes the log of the probability of the tree under Mark Holder's tree topology reference distribution. */ double LargetSimonMove::recalcWorkingPrior() const { PHYCAS_ASSERT(topo_prob_calc); std::pair<double, double> treeprobs = topo_prob_calc->CalcTopologyLnProb(*tree, true); const double ln_ref_topo = treeprobs.first; const double ln_ref_edges = treeprobs.second; double ln_ref_dist = ln_ref_topo + ln_ref_edges; return ln_ref_dist; }
/*---------------------------------------------------------------------------------------------------------------------- | Override of base class version adds the current GTR relative rate parameter value to the data already stored in | `fitting_sample'. */ void GTRRateParam::educateWorkingPrior() { if (!isFixed()) { PHYCAS_ASSERT(isPriorSteward()); // only prior stewards should be building working priors double rateparam = getCurrValueFromModel(); fitting_sample.push_back(rateparam); } }
/*---------------------------------------------------------------------------------------------------------------------- | Builds up the `binv' vector by classifying stored site patterns into the following categories (bins): |> | Count Bin description | ---------------------------------------- | n_0 all patterns containing only A | n_1 all patterns containing only C | n_2 all patterns containing only G | n_3 all patterns containing only T | n_4 patterns containing any 2 states | n_5 patterns containing any 3 states | n_6 patterns containing any 4 states | ---------------------------------------- |> | Warning: this function currently assumes no missing data! A missing state is treated as if it were one of the | other states in the pattern. For example, the count for the pattern 'AAACA?GAA' would be stuffed into the 3-state | bin, with the implicit assumption being that the ? equals either A, C or G. */ void SimData::buildBinVector( unsigned nstates) /**< is the number of states */ { // formerly DISABLED_UNTIL_SIMULATION_WORKING_WITH_PARTITIONING unsigned nbins = 2*nstates - 1; binv.clear(); binv.resize(nbins, 0.0); std::set<int8_t> state_set; // pattern_map_t associates int8_vect_t keys (pattern) with double values (count) for (pattern_map_t::iterator it = sim_pattern_map.begin(); it != sim_pattern_map.end(); ++it) { const int8_vect_t & p = it->first; // For this pattern, count distinct states by creating a set state_set.clear(); unsigned last_state = UINT_MAX; for (int8_vect_t::const_iterator pit = p.begin(); pit != p.end(); ++pit) { int8_t curr_state = *pit; int cs = (int)curr_state; if (cs >= 0 && cs < (int)nstates) { // state is not a gap (-1), missing (nstates), or an ambiguity code (> nstates), so add to set state_set.insert(curr_state); last_state = cs; } } unsigned sz = (unsigned)state_set.size(); PHYCAS_ASSERT(sz > 0); PHYCAS_ASSERT(sz <= nstates); double this_count = (double)(it->second); if (sz == 1) { // pattern had only one state, so add pattern count to appropriate constant site bin binv[last_state] += this_count; } else { // pattern had sz states, so add pattern count to appropriate variable site bin binv[nstates + sz - 2] += this_count; } } }
/*---------------------------------------------------------------------------------------------------------------------- | Calls GetLnPDF function of prior to recalculate `curr_ln_prior'. This function is important because the | tempting getLnPrior() member function only returns the value of `curr_ln_prior' (it does not recalculate anything). */ double MCMCUpdater::recalcPrior() { // Many moves will not have a prior assigned to them. In these cases, just return 0.0, // which will have no effect on the cumulative log-prior. if (!(prior || mv_prior)) { //std::cerr << boost::str(boost::format("~~~ updater named '%s' cannot compute prior") % name) << std::endl; return 0.0; } if (prior) { //std::cerr << "MCMCUpdater::recalcPrior: prior = " << prior->GetDistributionDescription() << std::endl; //@@@ double value = getCurrValueFromModel(); try { curr_ln_prior = prior->GetLnPDF(value); } catch(XProbDist &) { PHYCAS_ASSERT(0); } //std::cerr << boost::str(boost::format("~~~ updater named '%s' computed univariate prior for value %g to be %g") % name % value % curr_ln_prior) << std::endl; return curr_ln_prior; } else { double_vect_t values; getCurrValuesFromModel(values); try { curr_ln_prior = mv_prior->GetLnPDF(values); } catch(XProbDist &) { PHYCAS_ASSERT(0); } //std::cerr << boost::str(boost::format("~~~ updater named '%s' computed multivariate prior for vector ") % name);//temp //std::copy(values.begin(), values.end(), std::ostream_iterator<double>(std::cerr," "));//temp //std::cerr << boost::str(boost::format(" to be %g") % curr_ln_prior) << std::endl;//temp return curr_ln_prior; } }
inline ConstCondLikelihoodShPtr InternalData::getValidChildCondLikePtr() const { //@POL-NESCENT Mark, I don't understand this - why not just assert that childWorkingCLA actually // points to a CondLikelihood object, then return childWorkingCLA directly? //PHYCAS_ASSERT(childCLAValid); //InternalData * t = const_cast<InternalData *>(this); //return t->getChildCondLikePtr(); PHYCAS_ASSERT(childWorkingCLA); return ConstCondLikelihoodShPtr(childWorkingCLA); }
/*---------------------------------------------------------------------------------------------------------------------- | Chooses a random edge and changes its current length m to a new length m* using the following formula, where `lambda' is | a tuning parameter. |> | m* = m*exp(lambda*(r.Uniform() - 0.5)) |> */ void LargetSimonMove::starTreeProposeNewState() { // Choose edge randomly. // unsigned numEdges = tree->GetNNodes() - 1; unsigned k = rng->SampleUInt(numEdges); unsigned i = 0; //@POL this loop is crying out for the for_each algorithm for (orig_node = tree->GetFirstPreorder(); orig_node != NULL; orig_node = orig_node->GetNextPreorder()) { // All nodes have an edge associated with them except for the root // if (!orig_node->IsTipRoot()) { if (i == k) { orig_edge_len = orig_node->GetEdgeLen(); break; } ++i; } } // Modify the edge // double m = orig_node->GetEdgeLen(); double mstar = m*std::exp(lambda*(rng->Uniform() - 0.5)); orig_node->SetEdgeLen(mstar); // Invalidate CLAs to ensure next likelihood calculation will be correct orig_node->SelectNode(); TreeNode * nd = orig_node->IsTip() ? orig_node->GetParent() : orig_node; PHYCAS_ASSERT(nd->IsInternal()); likelihood->useAsLikelihoodRoot(nd); likelihood->invalidateAwayFromNode(*orig_node); likelihood->invalidateBothEnds(orig_node); ChainManagerShPtr p = chain_mgr.lock(); PHYCAS_ASSERT(p); JointPriorManagerShPtr jpm = p->getJointPriorManager(); jpm->allEdgeLensModified(tree); //jpm->externalEdgeLensModified("external_edgelen", tree); }
/*---------------------------------------------------------------------------------------------------------------------- | Assumes dimension > 0 and `m' exists. Subtracts each element of `other' from the corresponding element of this. */ void SquareMatrix::Subtract( const SquareMatrix & other) /**< is the matrix to subtract from this */ { PHYCAS_ASSERT(dim > 0); unsigned last = dim*dim; double * otherp = &other.m[0][0]; double * p = &m[0][0]; for (unsigned i = 0; i < last; ++i) *p++ -= *otherp++; }
/*---------------------------------------------------------------------------------------------------------------------- | Assumes dimension > 0 and `m' exists. Sets each element of `m' to the supplied `scalar'. */ void SquareMatrix::SetToScalar( double scalar) /**< is the scalar value to which each element is set */ { //std::cerr << "----> SquareMatrix::SetToScalar " << id << ", scalar = " << scalar << " <----" << std::endl; PHYCAS_ASSERT(dim > 0); unsigned last = dim*dim; double * p = &m[0][0]; for (unsigned i = 0; i < last; ++i) *p++ = scalar; }
/*---------------------------------------------------------------------------------------------------------------------- | Assumes dimension > 0 and `m' exists. Multiplies each element of `m' by the supplied `scalar'. */ void SquareMatrix::ScalarMultiply( double scalar) /**< is the scalar value multiplied by each element */ { //std::cerr << "----> SquareMatrix::ScalarMultiply " << id << ", scalar = " << scalar << " <----" << std::endl; PHYCAS_ASSERT(dim > 0); unsigned last = dim*dim; double * p = &m[0][0]; for (unsigned i = 0; i < last; ++i) *p++ *= scalar; }
/*---------------------------------------------------------------------------------------------------------------------- | This creates a SliceSampler object and assigns it to the `slice_sampler' data member, which is a shared_ptr that | points to nothing initially. The SliceSampler constructor takes a boost::shared_ptr<Lot> (which we have available) | FuncToSampleWkPtr (this object). Thus, the SliceSampler cannot be created in the constructor because "this" does | not yet fully exist, hence the need for a createSliceSampler() member function, which needs to be called at some | point after the MCMCUpdater-derived object is created and of course before its `slice_sampler' data member starts | being used. */ void MCMCUpdater::createSliceSampler() { PHYCAS_ASSERT(!slice_sampler); // don't want to do this more than once #if defined(WEAK_FUNCTOSAMPLE) slice_sampler.reset(new SliceSampler(rng, FuncToSampleWkPtr(shared_from_this()))); // forces inclusion of "phycas/src/slice_sampler.hpp" #else slice_sampler.reset(new SliceSampler(rng, shared_from_this())); // forces inclusion of "phycas/src/slice_sampler.hpp" #endif slice_sampler->SetMaxUnits(slice_max_units); slice_sampler->SetXValue(curr_value); }