Exemplo n.º 1
0
/** Return the most likely distance between two contigs and the number
 * of pairs that support that estimate. */
static pair<int, unsigned>
maximumLikelihoodEstimate(int first, int last,
		const Histogram& samples,
		const PMF& pmf,
		unsigned len0, unsigned len1)
{
	int filterSize = 2 * (int)(0.05 * pmf.mean()) + 3; // want an odd filter size
	first = max(first, (int)pmf.minValue() - samples.maximum()) - filterSize/2;
	last = min(last, (int)pmf.maxValue() - samples.minimum()) + filterSize/2 + 1;

	/* When randomly selecting fragments that span a given point,
	 * longer fragments are more likely to be selected than
	 * shorter fragments.
	 */
	WindowFunction window(len0, len1);

	unsigned nsamples = samples.size();
	double bestLikelihood = -numeric_limits<double>::max();
	int bestTheta = first;
	unsigned bestn = 0;
	vector<double> le;
	vector<unsigned> le_n;
	vector<int> le_theta;
	for (int theta = first; theta <= last; theta++) {
		// Calculate the normalizing constant of the PMF, f_theta(x).
		double c = 0;
		for (int i = pmf.minValue(); i <= (int)pmf.maxValue(); ++i)
			c += pmf[i] * window(i - theta);

		double likelihood;
		unsigned n;
	   	tie(likelihood, n) = computeLikelihood(theta, samples, pmf);
		likelihood -= nsamples * log(c);
		le.push_back(likelihood);
		le_n.push_back(n);
		le_theta.push_back(theta);
	}

	HannWindow filter(filterSize);
	for (int i = filterSize / 2; i < (int)le.size()-(filterSize / 2); i++) {
		double likelihood = 0;
		for (int j = -filterSize / 2; j <= filterSize / 2; j++) {
			assert((unsigned)(i + j) < le.size() && i + j >= 0);
			likelihood += filter(j) * le[i + j];
		}

		if (le_n[i] > 0 && likelihood > bestLikelihood) {
			bestLikelihood = likelihood;
			bestTheta = le_theta[i];
			bestn = le_n[i];
		}
	}
	return make_pair(bestTheta, bestn);
}
Exemplo n.º 2
0
/** Estimate the distance between two contigs using the difference of
 * the population mean and the sample mean.
 * @param numPairs [out] the number of pairs that agree with the
 * expected distribution
 * @return the estimated distance
 */
static int estimateDistanceUsingMean(
		const std::vector<int>& samples, const PMF& pmf,
		unsigned& numPairs)
{
	Histogram h(samples.begin(), samples.end());
	int d = (int)round(pmf.mean() - h.mean());

	// Count the number of samples that agree with the distribution.
	unsigned n = 0;
	for (Histogram::const_iterator it = h.begin();
			it != h.end(); ++it)
		if (pmf[it->first + d] > pmf.minProbability())
			n += it->second;

	numPairs = n;
	return d;
}