Esempio n. 1
0
File: em.c Progetto: xied75/samtools
double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4])
{
	const bcf1_t *b[2];
	int i, j, n_smpl;
	double *pdg[2], flast[4], r, f0[2];
	// initialize others
	if (b0->n_smpl != b1->n_smpl) return -1; // different number of samples
	n_smpl = b0->n_smpl;
	b[0] = b0; b[1] = b1;
	f[0] = f[1] = f[2] = f[3] = -1.;
	if (b[0]->n_alleles < 2 || b[1]->n_alleles < 2) return -1; // one allele only
	pdg[0] = get_pdg3(b0); pdg[1] = get_pdg3(b1);
	if (pdg[0] == 0 || pdg[1] == 0) {
		free(pdg[0]); free(pdg[1]);
		return -1;
	}
	// set the initial value
	f0[0] = est_freq(n_smpl, pdg[0]);
	f0[1] = est_freq(n_smpl, pdg[1]);
	f[0] = (1 - f0[0]) * (1 - f0[1]); f[3] = f0[0] * f0[1];
	f[1] = (1 - f0[0]) * f0[1]; f[2] = f0[0] * (1 - f0[1]);
	// iteration
	for (j = 0; j < ITER_MAX; ++j) {
		double eps = 0;
		memcpy(flast, f, 4 * sizeof(double));
		pair_freq_iter(n_smpl, pdg, f);
		for (i = 0; i < 4; ++i) {
			double x = fabs(f[i] - flast[i]);
			if (x > eps) eps = x;
		}
		if (eps < EPS) break;
	}
	// free
	free(pdg[0]); free(pdg[1]);
	{ // calculate r^2
		double p[2], q[2], D;
		p[0] = f[0] + f[1]; q[0] = 1 - p[0];
		p[1] = f[0] + f[2]; q[1] = 1 - p[1];
		D = f[0] * f[3] - f[1] * f[2];
		r = sqrt(D * D / (p[0] * p[1] * q[0] * q[1]));
//		printf("R(%lf,%lf,%lf,%lf)=%lf\n", f[0], f[1], f[2], f[3], r);
		if (_isnan(r)) r = -1.;
	}
	return r;
}
Esempio n. 2
0
File: em.c Progetto: Debian/bcftools
// x[0]: ref frequency
// x[1..3]: alt-alt, alt-ref, ref-ref frequenc
// x[4]: HWE P-value
// x[5..6]: group1 freq, group2 freq
// x[7]: 1-degree P-value
// x[8]: 2-degree P-value
int bcf_em1(call_t *call, const bcf1_t *rec, int n1, int flag, double x[10])
{
	double *pdg;
	int i, n; //, n2;
	if (rec->n_allele < 2) return -1; // one allele only
	// initialization
	if (n1 < 0 || n1 > rec->n_sample) n1 = 0;
	if (flag & 1<<7) flag |= 7<<5; // compute group freq if LRT is required
	if (flag & 0xf<<1) flag |= 0xf<<1;
	n = rec->n_sample; //n2 = n - n1;
	pdg = call->pdg;
	if (pdg == 0) return -1;
	for (i = 0; i < 10; ++i) x[i] = -1.; // set to negative
	{
		if ((x[0] = est_freq(n, pdg)) < 0.) return -1; // no data
		x[0] = freqml(x[0], 0, n, pdg);
	}
	if (flag & (0xf<<1|3<<8)) { // estimate the genotype frequency and test HWE
		double *g = x + 1, f3[3], r;
		f3[0] = g[0] = (1 - x[0]) * (1 - x[0]);
		f3[1] = g[1] = 2 * x[0] * (1 - x[0]);
		f3[2] = g[2] = x[0] * x[0];
		for (i = 0; i < ITER_MAX; ++i)
			if (g3_iter(g, pdg, 0, n) < EPS) break;
		// Hardy-Weinberg equilibrium (HWE)
		for (i = 0, r = 1.; i < n; ++i) {
			double *p = pdg + i * 3;
			r *= (p[0] * g[0] + p[1] * g[1] + p[2] * g[2]) / (p[0] * f3[0] + p[1] * f3[1] + p[2] * f3[2]);
		}
		x[4] = kf_gammaq(.5, log(r));
	}
	if ((flag & 7<<5) && n1 > 0 && n1 < n) { // group frequency
		x[5] = freqml(x[0], 0, n1, pdg);
		x[6] = freqml(x[0], n1, n, pdg);
	}
	if ((flag & 1<<7) && n1 > 0 && n1 < n) { // 1-degree P-value
		double f[3], f3[3][3], tmp;
		f[0] = x[0]; f[1] = x[5]; f[2] = x[6];
		for (i = 0; i < 3; ++i)
			f3[i][0] = (1-f[i])*(1-f[i]), f3[i][1] = 2*f[i]*(1-f[i]), f3[i][2] = f[i]*f[i];
		tmp = log(lk_ratio_test(n, n1, pdg, f3));
		if (tmp < 0) tmp = 0;
		x[7] = kf_gammaq(.5, tmp);
	}
	if ((flag & 3<<8) && n1 > 0 && n1 < n) { // 2-degree P-value
		double g[3][3], tmp;
		for (i = 0; i < 3; ++i) memcpy(g[i], x + 1, 3 * sizeof(double));
		for (i = 0; i < ITER_MAX; ++i)
			if (g3_iter(g[1], pdg, 0, n1) < EPS) break;
		for (i = 0; i < ITER_MAX; ++i)
			if (g3_iter(g[2], pdg, n1, n) < EPS) break;
		tmp = log(lk_ratio_test(n, n1, pdg, g));
		if (tmp < 0) tmp = 0;
		x[8] = kf_gammaq(1., tmp);
	}
	return 0;
}