Exemplo n.º 1
0
static void writeEstimate(ostream& out,
		const ContigNode& id0, const ContigNode& id1,
		unsigned len0, unsigned len1,
		const Pairs& pairs, const PMF& pmf)
{
	if (pairs.size() < opt::npairs)
		return;

	DistanceEst est;
	est.distance = estimateDistance(len0, len1,
			pairs, pmf, est.numPairs);
	est.stdDev = pmf.getSampleStdDev(est.numPairs);

	std::pair<ContigNode, ContigNode> e(id0, id1 ^ id0.sense());
	if (est.numPairs >= opt::npairs) {
		if (opt::format == DOT) {
#pragma omp critical(out)
			out << get(g_contigNames, e) << " [" << est << "]\n";
		} else
			out << ' ' << get(g_contigNames, id1) << ',' << est;
	} else if (opt::verbose > 1) {
#pragma omp critical(cerr)
		cerr << "warning: " << get(g_contigNames, e)
			<< " [d=" << est.distance << "] "
			<< est.numPairs << " of " << pairs.size()
			<< " pairs fit the expected distribution\n";
	}
}
Exemplo n.º 2
0
// term of non-HW HMatrix
inline double term_ij(
        double Bp,
        double Bq,
        const PMF<Allele> &p,
        const PMF<Allele> &q,
        const BackFreq &hback,
        Allele const &i,
        Allele const &j)
{
    // NB in this term (i, j) is ordered

    double ret    = p.val(i) * q.val(j);
    if (Bq>0)       ret += Bq * p.val(i) * hback(j, i);
    if (Bp>0)       ret += Bp * q.val(j) * hback(i, j);
    if (Bp*Bq>0)    ret += Bp * Bq * hback.pOrdered(make_pair(i,j));
    return ret;
}
Exemplo n.º 3
0
/** Estimate the distance between two contigs using the difference of
 * the population mean and the sample mean.
 * @param numPairs [out] the number of pairs that agree with the
 * expected distribution
 * @return the estimated distance
 */
static int estimateDistanceUsingMean(
		const std::vector<int>& samples, const PMF& pmf,
		unsigned& numPairs)
{
	Histogram h(samples.begin(), samples.end());
	int d = (int)round(pmf.mean() - h.mean());

	// Count the number of samples that agree with the distribution.
	unsigned n = 0;
	for (Histogram::const_iterator it = h.begin();
			it != h.end(); ++it)
		if (pmf[it->first + d] > pmf.minProbability())
			n += it->second;

	numPairs = n;
	return d;
}
Exemplo n.º 4
0
// This gives non-HW treatment of the 'F' term
HMatrix
makeHMatrixNHW(
        const PMF<Allele> &p,
        const PMF<Allele> &q,
        const BackFreq &hback,
        float delta,
        bool sparse)
{
    // How much background to add in?
    // If the input HMatrix is not normalized then we make up the difference
    // with background. Otherwise we add in delta.
    double Bp = std::max((double)delta, 1 - p.sum()); // background for p
    double Bq = std::max((double)delta, 1 - q.sum()); // background for q

    // apply this formula:
    // H(ij) = p(i)q(j) + Bq p(i)b(j|i) + Bp (q(j)b(i|j) + Bp Bq Bij

    HMatrix ret;

    // loop over all elements in the background. This gives us the upper
    // triangular terms only. We need to sum over all terms.

    HMatrix &href = (HMatrix&)hback; // to use base class member
    PMF< std::pair<Allele, Allele> >::iterator it;
    for (it = href.m_pmf.begin(); it != href.m_pmf.end(); ++it)
    {
        Allele i = it->first.first;
        Allele j = it->first.second;

        double h_ij = term_ij(Bp, Bq, p, q, hback, i, j); // upper-triangular term

        if (i != j)
        {
            h_ij += term_ij(Bp, Bq, p, q, hback, j, i); // lower-triangular term
        }

        if (!sparse || h_ij > 0) ret.set(i, j, h_ij);
    }

    ret.normalize(); // just in case
    return ret;
}
Exemplo n.º 5
0
// check for alleles not in population database
bool
AlleleSet::checkBackground(PMF<Allele> const &background) const
{
    bool ret = true;

	std::vector< PMF<Allele> >::const_iterator ip;
	for(ip = m_pmfs.begin(); ip != m_pmfs.end(); ++ip)
	{
		PMF<Allele>::const_iterator ia;
		for(ia = ip->begin(); ia != ip->end(); ++ia)
		{
			if (background.find(ia->first) == background.end())
			{
				// Allele not in database.

				warn << startl << "allele not in population database: " << ia->first.string() << std::endl;
				ret = false;
			}
		}
	}
	return ret;
}
Exemplo n.º 6
0
/** Compute the log likelihood that these samples came from the
 * specified distribution shifted by the parameter theta.
 * @param theta the parameter of the PMF, f_theta(x)
 * @param samples the samples
 * @param pmf the probability mass function
 * @return the log likelihood
 */
static pair<double, unsigned>
computeLikelihood(int theta, const Histogram& samples, const PMF& pmf)
{
	double likelihood = 0;
	unsigned nsamples = 0;
	for (Histogram::const_iterator it = samples.begin();
			it != samples.end(); ++it) {
		double p = pmf[it->first + theta];
		unsigned n = it->second;
		likelihood += n * log(p);
		if (p > pmf.minProbability())
			nsamples += n;
	}
	return make_pair(likelihood, nsamples);
}