double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]) { const bcf1_t *b[2]; int i, j, n_smpl; double *pdg[2], flast[4], r, f0[2]; // initialize others if (b0->n_smpl != b1->n_smpl) return -1; // different number of samples n_smpl = b0->n_smpl; b[0] = b0; b[1] = b1; f[0] = f[1] = f[2] = f[3] = -1.; if (b[0]->n_alleles < 2 || b[1]->n_alleles < 2) return -1; // one allele only pdg[0] = get_pdg3(b0); pdg[1] = get_pdg3(b1); if (pdg[0] == 0 || pdg[1] == 0) { free(pdg[0]); free(pdg[1]); return -1; } // set the initial value f0[0] = est_freq(n_smpl, pdg[0]); f0[1] = est_freq(n_smpl, pdg[1]); f[0] = (1 - f0[0]) * (1 - f0[1]); f[3] = f0[0] * f0[1]; f[1] = (1 - f0[0]) * f0[1]; f[2] = f0[0] * (1 - f0[1]); // iteration for (j = 0; j < ITER_MAX; ++j) { double eps = 0; memcpy(flast, f, 4 * sizeof(double)); pair_freq_iter(n_smpl, pdg, f); for (i = 0; i < 4; ++i) { double x = fabs(f[i] - flast[i]); if (x > eps) eps = x; } if (eps < EPS) break; } // free free(pdg[0]); free(pdg[1]); { // calculate r^2 double p[2], q[2], D; p[0] = f[0] + f[1]; q[0] = 1 - p[0]; p[1] = f[0] + f[2]; q[1] = 1 - p[1]; D = f[0] * f[3] - f[1] * f[2]; r = sqrt(D * D / (p[0] * p[1] * q[0] * q[1])); // printf("R(%lf,%lf,%lf,%lf)=%lf\n", f[0], f[1], f[2], f[3], r); if (_isnan(r)) r = -1.; } return r; }
// x[0]: ref frequency // x[1..3]: alt-alt, alt-ref, ref-ref frequenc // x[4]: HWE P-value // x[5..6]: group1 freq, group2 freq // x[7]: 1-degree P-value // x[8]: 2-degree P-value int bcf_em1(call_t *call, const bcf1_t *rec, int n1, int flag, double x[10]) { double *pdg; int i, n; //, n2; if (rec->n_allele < 2) return -1; // one allele only // initialization if (n1 < 0 || n1 > rec->n_sample) n1 = 0; if (flag & 1<<7) flag |= 7<<5; // compute group freq if LRT is required if (flag & 0xf<<1) flag |= 0xf<<1; n = rec->n_sample; //n2 = n - n1; pdg = call->pdg; if (pdg == 0) return -1; for (i = 0; i < 10; ++i) x[i] = -1.; // set to negative { if ((x[0] = est_freq(n, pdg)) < 0.) return -1; // no data x[0] = freqml(x[0], 0, n, pdg); } if (flag & (0xf<<1|3<<8)) { // estimate the genotype frequency and test HWE double *g = x + 1, f3[3], r; f3[0] = g[0] = (1 - x[0]) * (1 - x[0]); f3[1] = g[1] = 2 * x[0] * (1 - x[0]); f3[2] = g[2] = x[0] * x[0]; for (i = 0; i < ITER_MAX; ++i) if (g3_iter(g, pdg, 0, n) < EPS) break; // Hardy-Weinberg equilibrium (HWE) for (i = 0, r = 1.; i < n; ++i) { double *p = pdg + i * 3; r *= (p[0] * g[0] + p[1] * g[1] + p[2] * g[2]) / (p[0] * f3[0] + p[1] * f3[1] + p[2] * f3[2]); } x[4] = kf_gammaq(.5, log(r)); } if ((flag & 7<<5) && n1 > 0 && n1 < n) { // group frequency x[5] = freqml(x[0], 0, n1, pdg); x[6] = freqml(x[0], n1, n, pdg); } if ((flag & 1<<7) && n1 > 0 && n1 < n) { // 1-degree P-value double f[3], f3[3][3], tmp; f[0] = x[0]; f[1] = x[5]; f[2] = x[6]; for (i = 0; i < 3; ++i) f3[i][0] = (1-f[i])*(1-f[i]), f3[i][1] = 2*f[i]*(1-f[i]), f3[i][2] = f[i]*f[i]; tmp = log(lk_ratio_test(n, n1, pdg, f3)); if (tmp < 0) tmp = 0; x[7] = kf_gammaq(.5, tmp); } if ((flag & 3<<8) && n1 > 0 && n1 < n) { // 2-degree P-value double g[3][3], tmp; for (i = 0; i < 3; ++i) memcpy(g[i], x + 1, 3 * sizeof(double)); for (i = 0; i < ITER_MAX; ++i) if (g3_iter(g[1], pdg, 0, n1) < EPS) break; for (i = 0; i < ITER_MAX; ++i) if (g3_iter(g[2], pdg, n1, n) < EPS) break; tmp = log(lk_ratio_test(n, n1, pdg, g)); if (tmp < 0) tmp = 0; x[8] = kf_gammaq(1., tmp); } return 0; }