/** Compute the log likelihood that these samples came from the * specified distribution shifted by the parameter theta. * @param theta the parameter of the PMF, f_theta(x) * @param samples the samples * @param pmf the probability mass function * @return the log likelihood */ static pair<double, unsigned> computeLikelihood(int theta, const Histogram& samples, const PMF& pmf) { double likelihood = 0; unsigned nsamples = 0; for (Histogram::const_iterator it = samples.begin(); it != samples.end(); ++it) { double p = pmf[it->first + theta]; unsigned n = it->second; likelihood += n * log(p); if (p > pmf.minProbability()) nsamples += n; } return make_pair(likelihood, nsamples); }
/** Estimate the distance between two contigs using the difference of * the population mean and the sample mean. * @param numPairs [out] the number of pairs that agree with the * expected distribution * @return the estimated distance */ static int estimateDistanceUsingMean( const std::vector<int>& samples, const PMF& pmf, unsigned& numPairs) { Histogram h(samples.begin(), samples.end()); int d = (int)round(pmf.mean() - h.mean()); // Count the number of samples that agree with the distribution. unsigned n = 0; for (Histogram::const_iterator it = h.begin(); it != h.end(); ++it) if (pmf[it->first + d] > pmf.minProbability()) n += it->second; numPairs = n; return d; }