double pf(double x, double df1, double df2, int lower_tail, int log_p) { #ifdef IEEE_754 if (ISNAN(x) || ISNAN(df1) || ISNAN(df2)) return x + df2 + df1; #endif if (df1 <= 0. || df2 <= 0.) ML_ERR_return_NAN; R_P_bounds_01(x, 0., ML_POSINF); /* move to pchisq for very large values - was 'df1 > 4e5' in 2.0.x, now only needed for df1 = Inf or df2 = Inf {since pbeta(0,*)=0} : */ if (df2 == ML_POSINF) { if (df1 == ML_POSINF) { if(x < 1.) return R_DT_0; if(x == 1.) return (log_p ? -M_LN2 : 0.5); if(x > 1.) return R_DT_1; } return pchisq(x * df1, df1, lower_tail, log_p); } if (df1 == ML_POSINF)/* was "fudge" 'df1 > 4e5' in 2.0.x */ return pchisq(df2 / x , df2, !lower_tail, log_p); /* Avoid squeezing pbeta's first parameter against 1 : */ if (df1 * x > df2) x = pbeta(df2 / (df2 + df1 * x), df2 / 2., df1 / 2., !lower_tail, log_p); else x = pbeta(df1 * x / (df2 + df1 * x), df1 / 2., df2 / 2., lower_tail, log_p); return ML_VALID(x) ? x : ML_NAN; }
double Qchisq(double p0, double k){ double a=0.0; double b=10000.0; int i; if(pchisq(b,k)>p0){return(b);} for(i=0; i<100; i++){ if(pchisq((a+b)/2.0,k)>p0){ a=(a+b)/2.0; }else{ b=(a+b)/2.0; } if(fabs(a-b)<1e-8){break;} } return (a+b)/2.0; }
int main () { // boost test 0 //int cs[3][3] = {{5,24,14}, {57,138,96}, {151,315,187}}; //int ct[3][3] = {{4,16,10}, {71,176,86}, {120,330,200}}; // boost test 1 int cs[3][3] = {{227,225,29}, {191,189,58}, {24,33,11}}; int ct[3][3] = {{229,219,67}, {195,199,26}, {33,42,3}}; // boost test 2 //int cs[3][3] = {{32,103,93}, {49,250,203}, {52,94,111}}; //int ct[3][3] = {{33,121,96}, {76,203,227}, {20,124,113}}; // biforce 1 //int cs[3][3] = {{2,10,12}, {31,130,87}, {75,295,345}}; //int ct[3][3] = {{3,14,3}, {16,90,143}, {84,353,307}}; // biforce 2 //int cs[3][3] = {{21,214,289}, {7,119,260}, {0,0,77}}; //int ct[3][3] = {{12,123,542}, {3,69,223}, {4,14,23}}; double ll, pval; // calculate log likelihood ll = pairwise_epi_test(cs, ct); // determine p-value pval = pchisq(ll, 4.0, 0, 0); printf("\nLog likelihood: %f; p-value: %g\n\n", ll, pval); }
/* parametric tests for discrete variables. */ static double ct_discrete(SEXP xx, SEXP yy, SEXP zz, int nobs, int ntests, double *pvalue, double *df, test_e test) { int i = 0, llx = 0, lly = NLEVELS(yy), llz = 0; int *xptr = NULL, *yptr = INTEGER(yy), *zptr = NULL; double statistic = 0; SEXP xdata, config; DISCRETE_CACHE(); for (i = 0; i < ntests; i++) { DISCRETE_SWAP_X(); if (test == MI || test == MI_ADF || test == X2 || test == X2_ADF) { /* mutual information and Pearson's X^2 asymptotic tests. */ statistic = c_cchisqtest(xptr, llx, yptr, lly, zptr, llz, nobs, df, test); if ((test == MI) || (test == MI_ADF)) statistic = 2 * nobs * statistic; pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_SH) { /* shrinkage mutual information test. */ statistic = 2 * nobs * c_shcmi(xptr, llx, yptr, lly, zptr, llz, nobs, df); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == JT) { /* Jonckheere-Terpstra test. */ statistic = c_cjt(xptr, llx, yptr, lly, zptr, llz, nobs); pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE); }/*THEN*/ }/*FOR*/ UNPROTECT(1); return statistic; }/*CT_DISCRETE*/
/* parametric tests for discrete variables. */ static double ut_discrete(SEXP xx, SEXP yy, int nobs, int ntests, double *pvalue, double *df, test_e test) { int i = 0, llx = 0, lly = NLEVELS(yy), *xptr = NULL, *yptr = INTEGER(yy); double statistic = 0; SEXP xdata; for (i = 0; i < ntests; i++) { DISCRETE_SWAP_X(); if (test == MI || test == MI_ADF || test == X2 || test == X2_ADF) { /* mutual information and Pearson's X^2 asymptotic tests. */ statistic = c_chisqtest(xptr, llx, yptr, lly, nobs, df, test); if ((test == MI) || (test == MI_ADF)) statistic = 2 * nobs * statistic; pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_SH) { /* shrinkage mutual information test. */ statistic = 2 * nobs * c_shmi(xptr, llx, yptr, lly, nobs); *df = ((double)(llx - 1) * (double)(lly - 1)); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == JT) { /* Jonckheere-Terpstra test. */ statistic = c_jt(xptr, llx, yptr, lly, nobs); pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE); }/*THEN*/ }/*FOR*/ return statistic; }/*UT_DISCRETE*/
double pf(double x, double n1, double n2, int lower_tail, int log_p) { #ifdef IEEE_754 if (ISNAN(x) || ISNAN(n1) || ISNAN(n2)) return x + n2 + n1; #endif if (n1 <= 0. || n2 <= 0.) ML_ERR_return_NAN; if (x <= 0.) return R_DT_0; /* fudge the extreme DF cases -- pbeta doesn't do this well */ if (n2 > 4e5) return pchisq(x * n1, n1, lower_tail, log_p); if (n1 > 4e5) return pchisq(n2 / x , n2, !lower_tail, log_p); x = pbeta(n2 / (n2 + n1 * x), n2 / 2.0, n1 / 2.0, !lower_tail, log_p); return ML_VALID(x) ? x : numeric_limits<double>::quiet_NaN(); }
SEXP pvaluecombine( SEXP RpVec, SEXP Rmethod ) { int k = length(RpVec); const char * method = CHAR(STRING_ELT(Rmethod, 0)); SEXP Rcmbdpvalue = PROTECT(allocVector(REALSXP, 1)); memset(REAL(Rcmbdpvalue), 0.0, sizeof(double)); double * cmbdpvalue = REAL(Rcmbdpvalue); if (!strcmp(method, "fisher")) { for (int i=0; i<k; i++) { *cmbdpvalue += log(REAL(RpVec)[i]); } *cmbdpvalue = 1 - pchisq(-2 * *cmbdpvalue, 2*k, 1, 0); } else if (!strcmp(method, "normal") || !strcmp(method, "stouffer")) { for (int i=0; i<k; i++) { *cmbdpvalue += qnorm(REAL(RpVec)[i], 0.0, 1.0, 1, 0); } *cmbdpvalue = *cmbdpvalue / sqrt(k); *cmbdpvalue = pnorm(*cmbdpvalue, 0.0, 1.0, 1, 0); } else if (!strcmp(method, "min") || !strcmp(method, "tippett")) { *cmbdpvalue = REAL(RpVec)[0]; for (int i=1; i<k; i++) { *cmbdpvalue = fmin2(*cmbdpvalue, REAL(RpVec)[i]); } *cmbdpvalue = 1 - pow(1-*cmbdpvalue, k); } else if (!strcmp(method, "max")) { *cmbdpvalue = REAL(RpVec)[0]; for (int i=1; i<k; i++) { *cmbdpvalue = fmax2(*cmbdpvalue, REAL(RpVec)[i]); } *cmbdpvalue = pow(*cmbdpvalue, k); } else if (!strcmp(method, "sum")) { for (int i=0; i<k; i++) { *cmbdpvalue += REAL(RpVec)[i]; } if (k <= 30) { *cmbdpvalue = pConvolveUniform(*cmbdpvalue, (double)k); } else { *cmbdpvalue = pnorm(*cmbdpvalue, (double)k/2.0, sqrt((double)k/12.0), 1, 0); } } else { *cmbdpvalue = 3.1415926; } // return UNPROTECT(1); return(Rcmbdpvalue); }
void disco_chisq(int *freqs,int *n_genes,int *labels,int *n_labels,double *res) { int g = 0,i = 0, j = 0; int freq_length = 2 * *n_labels; double beta1[BETA1_SIZE] = {0,0,0,0}; double beta2[BETA2_SIZE] = {0,0,0}; double f1[] = {0,0,0,0}; double f2[] = {0,0,0}; double d1[] = { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 }; double d2[] = { 0,0,0, 0,0,0, 0,0,0 }; for(g=0;g<*n_genes;g++) { int gene_offset = g * freq_length; int res_offset = 9 * g; int colsums_freqs[2] = {0,0}; int colsums_freqs_x_labels[2] = {0,0}; int colsums_freqs_x_labels_x_labels[2] = {0,0}; for(i=0;i<*n_labels;i++) { colsums_freqs[0] += *(freqs+i+gene_offset); colsums_freqs[1] += *(freqs+i+*n_labels+gene_offset); colsums_freqs_x_labels[0] += *(freqs+i+gene_offset) * *(labels+i); colsums_freqs_x_labels[1] += *(freqs+i+*n_labels+gene_offset) * *(labels+i); colsums_freqs_x_labels_x_labels[0] += *(freqs+i+gene_offset) * *(labels+i) * *(labels+i); colsums_freqs_x_labels_x_labels[1] += *(freqs+i+*n_labels+gene_offset) * *(labels+i) * *(labels+i); } for(i=0;i<BETA1_SIZE;i++) { beta1[i] = 0; *(f1+i) = 0; for(j=0;j<BETA1_SIZE;j++) { *(d1 + BETA1_SIZE*i + j) = 0; } } for(i=0;i<BETA2_SIZE;i++) { beta2[i] = 0; *(f2+i) = 0; for(j=0;j<BETA2_SIZE;j++) { *(d2 + BETA2_SIZE*i + j) = 0; } } derivatives1(f1,d1,labels,n_labels,beta1,colsums_freqs,colsums_freqs_x_labels,colsums_freqs_x_labels_x_labels); double f1b[] = {f1[0],f1[1],f1[2]}; newton1(f1,d1,labels,n_labels,beta1,colsums_freqs,colsums_freqs_x_labels,colsums_freqs_x_labels_x_labels); f2[0] = f1b[0]; newton2(f1b,f2,d2,labels,n_labels,beta2,colsums_freqs,colsums_freqs_x_labels,colsums_freqs_x_labels_x_labels); double log1[2] = {0,0}; double larp1[2] = {0,0}; for(i=0;i<2;i++) { double alpha[2]; double bes[2]; if (i==0) { alpha[0] = beta1[0]; alpha[1] = beta1[1]; bes[0] = beta1[2]; bes[1] = beta1[3]; } else if (i==1) { alpha[0] = beta2[0]; alpha[1] = beta2[0]; bes[0] = beta2[1]; bes[1] = beta2[2]; } double br[2] = {0,0}; for(j=0;j<*n_labels;j++) { br[0] += exp( (labels[j] * alpha[0]) + (pow(labels[j],2) * bes[0] ) ); br[1] += exp( (labels[j] * alpha[1]) + (pow(labels[j],2) * bes[1] ) ); } larp1[i] = ((double)colsums_freqs[0] * log(br[0])) + ((double)colsums_freqs[1] * log(br[1])); log1[i] = ( (colsums_freqs_x_labels[0] * alpha[0]) + (colsums_freqs_x_labels[1] * alpha[1]) ) + ( (colsums_freqs_x_labels_x_labels[0] * bes[0]) + (colsums_freqs_x_labels_x_labels[1] * bes[1]) ) - larp1[i]; } double chisq = -2 * (log1[1] - log1[0]); double pvalue = 1 - pchisq(chisq,1,1,0); //return beta1,beta2,chisq,pvalue *(res + res_offset) = pvalue; *(res + 1 + res_offset) = chisq; *(res + 2 + res_offset) = beta1[0]; *(res + 3 + res_offset) = beta1[1]; *(res + 4 + res_offset) = beta1[2]; *(res + 5 + res_offset) = beta1[3]; *(res + 6 + res_offset) = beta2[0]; *(res + 7 + res_offset) = beta2[1]; *(res + 8 + res_offset) = beta2[2]; } }
void artp3_chr(char **R_file_prefix, int *R_method, int *R_nperm, int *R_seed, int *R_nthread, int *R_nsnp, int *R_ngene, double *R_vU, double *R_score0, double *R_vV, int *R_vgene_idx, int *R_gene_start, int *R_gene_end, int *R_vgene_cutpoint, int *R_gene_cutpoint_start, int *R_gene_cutpoint_end, double *R_gene_pval, int *R_arr_rank, int *R_sel_id, int *R_marg_id){ int len_file_prefix = strlen(*R_file_prefix); char *file_prefix = new char[len_file_prefix + 1]; file_prefix[0] = '\0'; strcat(file_prefix, *R_file_prefix); int method = *R_method; assert(method == 3); if(method == 3){ ; } int nperm = *R_nperm; int seed = *R_seed; int nthread = *R_nthread; int nsnp = *R_nsnp; int ngene = *R_ngene; fvec score0; fvec sigma2; fmat U; fmat V; load_score0(R_score0, score0, nsnp); load_cov(R_vV, V, nsnp); load_sigma2(V, sigma2); load_U(R_vU, U, nsnp); imat gene_idx; // index of SNPs in a gene load_gene_idx(R_vgene_idx, R_gene_start, R_gene_end, gene_idx, ngene); imat cutpoint; load_gene_cutpoint(R_vgene_cutpoint, R_gene_cutpoint_start, R_gene_cutpoint_end, cutpoint, ngene); string fprefix (file_prefix); svec gene_out (ngene, fprefix); for(int g = 0; g < ngene; ++g){ ostringstream gid; gid << g; gene_out[g] = gene_out[g] + string("GID.") + gid.str() + string(".bin"); } // write obs statistics for all genes imat sel_id(ngene); ivec marg_id(ngene); for(int g = 0; g < ngene; ++g){ fstream gout(gene_out[g].c_str(), ios::out | ios::binary); if(!gout){ error("Fail to write observed statistics to file"); } int ns = gene_idx[g].size(); int ncp = cutpoint[g].size(); int max_cutpoint = cutpoint[g][ncp - 1]; fvec s (ns, .0f); VecStat vs (ns, STAT0); for(int j = 0; j < ns; ++j){ s[j] = score0[gene_idx[g][j]]; s[j] = pchisq(s[j] * s[j] / sigma2[gene_idx[g][j]], 1, false, true); vs[j].stat = -s[j]; vs[j].id = j; } sort(vs.begin(), vs.end(), descending); marg_id[g] = vs[0].id; for(int j = 0; j < ns; ++j){ sel_id[g].push_back(vs[j].id); } sort(s.begin(), s.end()); for(int j = 1; j <= max_cutpoint; ++j){ s[j] += s[j - 1]; } for(int k = 0; k < ncp; ++k){ float u = -s[cutpoint[g][k]]; gout.write((char*)(&u), sizeof(u)); } gout.close(); } int i_sel_id = -1; for(int g = 0; g < ngene; ++g){ R_marg_id[g] = gene_idx[g][marg_id[g]] + 1; for(int k = 0; k < sel_id[g].size(); ++k){ ++i_sel_id; R_sel_id[i_sel_id] = gene_idx[g][sel_id[g][k]] + 1; } int nn = gene_idx[g].size() - sel_id[g].size(); while(nn){ ++i_sel_id; R_sel_id[i_sel_id] = -1; --nn; } } int ngap = min(10000, nperm); int nblock = nperm / ngap; for(int b = 0; b < nblock; ++b){ fmat null(ngap, fvec (nsnp, .0f)); drand48_data buf; // compute null statistics #pragma omp parallel num_threads(nthread) private(buf) { srand48_r(seed + b * nthread + omp_get_thread_num(), &buf); #pragma omp for for(int i = 0; i < ngap; ++i){ fvec rn; rnorm(buf, nsnp, rn); for(int j = 0; j < nsnp; ++j){ null[i][j] = .0f; for(int k = 0; k < nsnp; ++k){ null[i][j] += rn[k] * U[k][j]; } null[i][j] = null[i][j] * null[i][j] / sigma2[j]; null[i][j] = pchisq(null[i][j], 1, false, true); } } } // write null statistics to local files (per gene) #pragma omp parallel num_threads(min(nthread, ngene)) { #pragma omp for for(int g = 0; g < ngene; ++g){ ofstream gout; gout.open(gene_out[g].c_str(), ios::out | ios::binary | ios::app); if(!gout){ error("Fail to write null statistics to file"); } int ns = gene_idx[g].size(); int ncp = cutpoint[g].size(); int max_cutpoint = cutpoint[g][ncp - 1]; for(int i = 0; i < ngap; ++i){ fvec s(ns, .0f); for(int j = 0; j < ns; ++j){ s[j] = null[i][gene_idx[g][j]]; } sort(s.begin(), s.end()); for(int j = 1; j <= max_cutpoint; ++j){ s[j] += s[j - 1]; } for(int k = 0; k < ncp; ++k){ float u = -s[cutpoint[g][k]]; gout.write((char*)(&u), sizeof(u)); } } gout.close(); } } //fmat().swap(null); } // read null statistics (per gene) int irk = -1; for(int g = 0; g < ngene; ++g){ int ncp = cutpoint[g].size(); vector<VecStat> stat(ncp, VecStat (nperm + 1, STAT0)); fstream gin(gene_out[g].c_str(), ios::in | ios::binary); for(int i = 0; i < nperm + 1; ++i){ for(int j = 0; j < ncp; ++j){ float s = .0f; gin.read((char*)(&s), sizeof(s)); stat[j][i].stat = s; stat[j][i].id = i; } } gin.close(); if(remove(gene_out[g].c_str())){ error("Cannot delete gene output file"); } imat arr_rank(ncp, ivec (nperm + 1, 0)); #pragma omp parallel num_threads(min(ncp, nthread)) { #pragma omp for for(int j = 0; j < ncp; ++j){ sort(stat[j].begin(), stat[j].end(), descending); for(int i = 0; i < nperm + 1; ++i){ int id = stat[j][i].id; arr_rank[j][id] = i; } } } vector<VecStat>().swap(stat); ivec gene_min_p (nperm + 1, -1); ivec subsum(nthread, 0); ivec subtie(nthread, 0); int m = nperm + 1; for(int j = 0; j < ncp; ++j){ ++irk; R_arr_rank[irk] = arr_rank[j][0]; if(arr_rank[j][0] < m){ m = arr_rank[j][0]; } } gene_min_p[0] = m; #pragma omp parallel num_threads(nthread) { #pragma omp for for(int i = 1; i < nperm + 1; ++i){ int tid = omp_get_thread_num(); int m = nperm + 1; for(int j = 0; j < ncp; ++j){ if(arr_rank[j][i] < m){ m = arr_rank[j][i]; } } gene_min_p[i] = m; if(gene_min_p[i] < gene_min_p[0]){ subsum[tid] += 1; }else if(gene_min_p[i] == gene_min_p[0]){ subtie[tid] += 1; }else{ ; } } } R_gene_pval[g] = 1.0; int rep = 0; for(int t = 0; t < nthread; ++t){ R_gene_pval[g] += subsum[t]; rep += subtie[t]; } R_gene_pval[g] += rep / 2.0; R_gene_pval[g] /= nperm + 1; fstream gout(gene_out[g].c_str(), ios::out | ios::binary); if(!gout){ error("Fail to write gene statistics to file"); } for(int i = 0; i < nperm + 1; ++i){ gout.write((char*)(&(gene_min_p[i])), sizeof(gene_min_p[i])); } gout.close(); } delete[] file_prefix; }
double attribute_hidden pnchisq_raw(double x, double f, double theta, double errmax, double reltol, int itrmax, Rboolean lower_tail) { double lam, x2, f2, term, bound, f_x_2n, f_2n; double l_lam = -1., l_x = -1.; /* initialized for -Wall */ int n; Rboolean lamSml, tSml, is_r, is_b, is_it; LDOUBLE ans, u, v, t, lt, lu =-1; static const double _dbl_min_exp = M_LN2 * DBL_MIN_EXP; /*= -708.3964 for IEEE double precision */ if (x <= 0.) { if(x == 0. && f == 0.) return lower_tail ? exp(-0.5*theta) : -expm1(-0.5*theta); /* x < 0 or {x==0, f > 0} */ return lower_tail ? 0. : 1.; } if(!R_FINITE(x)) return lower_tail ? 1. : 0.; /* This is principally for use from qnchisq */ #ifndef MATHLIB_STANDALONE R_CheckUserInterrupt(); #endif if(theta < 80) { /* use 110 for Inf, as ppois(110, 80/2, lower.tail=FALSE) is 2e-20 */ LDOUBLE sum = 0, sum2 = 0, lambda = 0.5*theta, pr = EXP(-lambda); // does this need a feature test? double ans; int i; /* we need to renormalize here: the result could be very close to 1 */ for(i = 0; i < 110; pr *= lambda/++i) { sum2 += pr; sum += pr * pchisq(x, f+2*i, lower_tail, FALSE); if (sum2 >= 1-1e-15) break; } ans = (double) (sum/sum2); return ans; } #ifdef DEBUG_pnch REprintf("pnchisq(x=%g, f=%g, theta=%g): ",x,f,theta); #endif lam = .5 * theta; lamSml = (-lam < _dbl_min_exp); if(lamSml) { /* MATHLIB_ERROR( "non centrality parameter (= %g) too large for current algorithm", theta) */ u = 0; lu = -lam;/* == ln(u) */ l_lam = log(lam); } else { u = exp(-lam); } /* evaluate the first term */ v = u; x2 = .5 * x; f2 = .5 * f; f_x_2n = f - x; #ifdef DEBUG_pnch REprintf("-- v=exp(-th/2)=%g, x/2= %g, f/2= %g\n",v,x2,f2); #endif if(f2 * DBL_EPSILON > 0.125 && /* very large f and x ~= f: probably needs */ FABS(t = x2 - f2) < /* another algorithm anyway */ sqrt(DBL_EPSILON) * f2) { /* evade cancellation error */ /* t = exp((1 - t)*(2 - t/(f2 + 1))) / sqrt(2*M_PI*(f2 + 1));*/ lt = (1 - t)*(2 - t/(f2 + 1)) - 0.5 * log(2*M_PI*(f2 + 1)); #ifdef DEBUG_pnch REprintf(" (case I) ==> "); #endif } else { /* Usual case 2: careful not to overflow .. : */ lt = f2*log(x2) -x2 - lgammafn(f2 + 1); } #ifdef DEBUG_pnch REprintf(" lt= %g", lt); #endif tSml = (lt < _dbl_min_exp); if(tSml) { if (x > f + theta + 5* sqrt( 2*(f + 2*theta))) { /* x > E[X] + 5* sigma(X) */ return lower_tail ? 1. : 0.; /* FIXME: We could be more accurate than 0. */ } /* else */ l_x = log(x); ans = term = 0.; t = 0; } else { t = EXP(lt); #ifdef DEBUG_pnch REprintf(", t=exp(lt)= %g\n", t); #endif ans = term = (double) (v * t); } for (n = 1, f_2n = f + 2., f_x_2n += 2.; ; n++, f_2n += 2, f_x_2n += 2) { #ifdef DEBUG_pnch REprintf("\n _OL_: n=%d",n); #endif #ifndef MATHLIB_STANDALONE if(n % 1000) R_CheckUserInterrupt(); #endif /* f_2n === f + 2*n * f_x_2n === f - x + 2*n > 0 <==> (f+2n) > x */ if (f_x_2n > 0) { /* find the error bound and check for convergence */ bound = (double) (t * x / f_x_2n); #ifdef DEBUG_pnch REprintf("\n L10: n=%d; term= %g; bound= %g",n,term,bound); #endif is_r = is_it = FALSE; /* convergence only if BOTH absolute and relative error < 'bnd' */ if (((is_b = (bound <= errmax)) && (is_r = (term <= reltol * ans))) || (is_it = (n > itrmax))) { #ifdef DEBUG_pnch REprintf("BREAK n=%d %s; bound= %g %s, rel.err= %g %s\n", n, (is_it ? "> itrmax" : ""), bound, (is_b ? "<= errmax" : ""), term/ans, (is_r ? "<= reltol" : "")); #endif break; /* out completely */ } } /* evaluate the next term of the */ /* expansion and then the partial sum */ if(lamSml) { lu += l_lam - log(n); /* u = u* lam / n */ if(lu >= _dbl_min_exp) { /* no underflow anymore ==> change regime */ #ifdef DEBUG_pnch REprintf(" n=%d; nomore underflow in u = exp(lu) ==> change\n", n); #endif v = u = EXP(lu); /* the first non-0 'u' */ lamSml = FALSE; } } else { u *= lam / n; v += u; } if(tSml) { lt += l_x - log(f_2n);/* t <- t * (x / f2n) */ if(lt >= _dbl_min_exp) { /* no underflow anymore ==> change regime */ #ifdef DEBUG_pnch REprintf(" n=%d; nomore underflow in t = exp(lt) ==> change\n", n); #endif t = EXP(lt); /* the first non-0 't' */ tSml = FALSE; } } else { t *= x / f_2n; } if(!lamSml && !tSml) { term = (double) (v * t); ans += term; } } /* for(n ...) */ if (is_it) { MATHLIB_WARNING2(_("pnchisq(x=%g, ..): not converged in %d iter."), x, itrmax); } #ifdef DEBUG_pnch REprintf("\n == L_End: n=%d; term= %g; bound=%g\n",n,term,bound); #endif return (double) (lower_tail ? ans : 1 - ans); }
/* parametric tests for Gaussian variables. */ static double ct_gaustests(SEXP xx, SEXP yy, SEXP zz, int nobs, int ntests, double *pvalue, double *df, test_e test) { int i = 0, nsx = length(zz), ncols = nsx + 2; double transform = 0, **column = NULL, *mean = NULL, statistic = 0, lambda = 0; double *u = NULL, *d = NULL, *vt = NULL, *cov = NULL, *basecov = 0; /* compute the degrees of freedom for correlation and mutual information. */ if (test == COR) *df = nobs - ncols; else if ((test == MI_G) || (test == MI_G_SH)) *df = 1; if (((test == COR) && (*df < 1)) || ((test == ZF) && (nobs - ncols < 2))) { /* if there are not enough degrees of freedom, return independence. */ warning("trying to do a conditional independence test with zero degrees of freedom."); *df = 0; statistic = 0; for (i = 0; i < ntests; i++) pvalue[i] = 1; return statistic; }/*THEN*/ GAUSSIAN_CACHE(); if (ntests > 1) { /* allocate and compute mean values and the covariance matrix. */ mean = Calloc1D(ncols, sizeof(double)); c_meanvec(column, mean, nobs, ncols, 1); c_covmat(column, mean, ncols, nobs, cov, 1); memcpy(basecov, cov, ncols * ncols * sizeof(double)); for (i = 0; i < ntests; i++) { GAUSSIAN_PCOR_CACHE(); if (test == COR) { COMPUTE_PCOR(); transform = cor_t_trans(statistic, *df); pvalue[i] = 2 * pt(fabs(transform), *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_G) { COMPUTE_PCOR(); statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_G_SH) { lambda = covmat_lambda(column, mean, cov, nobs, ncols); covmat_shrink(cov, ncols, lambda); COMPUTE_PCOR(); statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == ZF) { COMPUTE_PCOR(); statistic = cor_zf_trans(statistic, (double)nobs - ncols); pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE); }/*THEN*/ }/*FOR*/ }/*THEN*/ else { GAUSSIAN_PCOR_NOCACHE(); if (test == COR) { COMPUTE_PCOR(); transform = cor_t_trans(statistic, *df); pvalue[0] = 2 * pt(fabs(transform), *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_G) { COMPUTE_PCOR(); statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[0] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_G_SH) { lambda = covmat_lambda(column, mean, cov, nobs, ncols); covmat_shrink(cov, ncols, lambda); COMPUTE_PCOR(); statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[0] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == ZF) { COMPUTE_PCOR(); statistic = cor_zf_trans(statistic, (double)nobs - ncols); pvalue[0] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE); }/*THEN*/ }/*ELSE*/ GAUSSIAN_FREE(); Free1D(mean); Free1D(column); return statistic; }/*CT_GAUSTESTS*/
/* parametric tests for Gaussian variables. */ static double ut_gaustests(SEXP xx, SEXP yy, int nobs, int ntests, double *pvalue, double *df, test_e test) { int i = 0; double transform = 0, *xptr = NULL, *yptr = REAL(yy); double xm = 0, ym = 0, xsd = 0, ysd = 0, statistic = 0; /* compute the degrees of freedom for correlation and mutual information. */ if (test == COR) *df = nobs - 2; else if ((test == MI_G) || (test == MI_G_SH)) *df = 1; if (((test == COR) && (*df < 1)) || ((test == ZF) && (nobs - 2 < 2))) { /* if there are not enough degrees of freedom, return independence. */ warning("trying to do an independence test with zero degrees of freedom."); *df = 0; statistic = 0; for (i = 0; i < ntests; i++) pvalue[i] = 1; return statistic; }/*THEN*/ /* cache mean and variance. */ ym = c_mean(yptr, nobs); ysd = c_sse(yptr, ym, nobs); for (i = 0; i < ntests; i++) { GAUSSIAN_SWAP_X(); statistic = c_fast_cor(xptr, yptr, nobs, xm, ym, xsd, ysd); if (test == COR) { transform = cor_t_trans(statistic, *df); pvalue[i] = 2 * pt(fabs(transform), *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_G) { statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_G_SH) { statistic *= 1 - cor_lambda(xptr, yptr, nobs, xm, ym, xsd, ysd, statistic); statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == ZF) { statistic = cor_zf_trans(statistic, (double)nobs - 2); pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE); }/*THEN*/ }/*FOR*/ return statistic; }/*UT_GAUSTESTS*/
double test ( int ** hapallele, int snp, double * pheno, int numind, int ** cladeslist, int * cladesusage, int * numperclade, int numclades, int * cladeacc, double ** score, double * sumranks, double tie, double ** stat ) { double pvalue, maxstat, temppv; double kwstat; double rawkw; double kw; int clade; int i; int df=1; int sigclade; pvalue=1; do { sigclade=-9; maxstat=1; for ( clade=0; clade<numclades; clade++ ) { if ( cladesusage[clade]==0 ) { // test ith clade with previously found clades and its complement set kwstat = kw_quant ( cladeslist, cladesusage, numperclade, cladeacc, numclades,pheno, df, clade, numind, score, sumranks, tie ); //kwstat = chisq ( cladeslist, cladesusage, numperclade, cladeacc, numclades,pheno, df, clade, numind, score, sumranks, tie ); temppv = 1-pchisq(kwstat, df ); if ( temppv<maxstat ) { rawkw=kwstat; maxstat=temppv; sigclade=clade; } } } if ( maxstat<pvalue ) { pvalue=maxstat; kw=rawkw; } else { break; } if ( sigclade!=-9 ) { cladesusage[sigclade]=df; for ( i=0; i<numperclade[sigclade]; i++ ) { cladeacc[cladeslist[sigclade][i]]=df; } claderemove( cladesusage, cladeslist, numperclade, numclades, df, cladeacc ); df++; } i=i; }while( cladecount( numclades, cladesusage )!=0 ); for ( i=0; i<numind; i++ ) { if ( cladeacc[i]==0 ) { cladeacc[i]=df; } hapallele[i][snp]=cladeacc[i]; } stat[snp][0]=kw; stat[snp][1]=df-1; return pvalue; }
/* conditional linear Gaussian mutual information test. */ static double ut_micg(SEXP xx, SEXP yy, int nobs, int ntests, double *pvalue, double *df) { int i = 0, xtype = 0, ytype = TYPEOF(yy), llx = 0, lly = 0; double xm = 0, xsd = 0, ym = 0, ysd = 0, statistic = 0; void *xptr = NULL, *yptr = NULL; SEXP xdata; if (ytype == INTSXP) { /* cache the number of levels. */ lly = NLEVELS(yy); yptr = INTEGER(yy); }/*THEN*/ else { /* cache mean and variance. */ yptr = REAL(yy); ym = c_mean(yptr, nobs); ysd = c_sse(yptr, ym, nobs); }/*ELSE*/ for (i = 0; i < ntests; i++) { xdata = VECTOR_ELT(xx, i); xtype = TYPEOF(xdata); if ((ytype == INTSXP) && (xtype == INTSXP)) { /* if both nodes are discrete, the test reverts back to a discrete * mutual information test. */ xptr = INTEGER(xdata); llx = NLEVELS(xdata); DISCRETE_SWAP_X(); statistic = 2 * nobs * c_chisqtest(xptr, llx, yptr, lly, nobs, df, MI); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if ((ytype == REALSXP) && (xtype == REALSXP)) { /* if both nodes are continuous, the test reverts back to a Gaussian * mutual information test. */ xptr = REAL(xdata); xm = c_mean(xptr, nobs); xsd = c_sse(xptr, xm, nobs); statistic = c_fast_cor(xptr, yptr, nobs, xm, ym, xsd, ysd); *df = 1; statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else { if (xtype == INTSXP) { xptr = INTEGER(xdata); llx = NLEVELS(xdata); ysd = sqrt(ysd / (nobs - 1)); statistic = 2 * nobs * c_micg(yptr, ym, ysd, xptr, llx, nobs); *df = llx - 1; pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else { xptr = REAL(xdata); xm = c_mean(xptr, nobs); xsd = sqrt(c_sse(xptr, xm, nobs) / (nobs - 1)); statistic = 2 * nobs * c_micg(xptr, xm, xsd, yptr, lly, nobs); *df = lly - 1; pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*ELSE*/ }/*THEN*/ }/*FOR*/ return statistic; }/*UT_MICG*/
/* conditional linear Gaussian mutual information test. */ static double ct_micg(SEXP xx, SEXP yy, SEXP zz, int nobs, int ntests, double *pvalue, double *df) { int xtype = 0, ytype = TYPEOF(yy), *nlvls = NULL, llx = 0, lly = 0, llz = 0; int ndp = 0, ngp = 0, nsx = length(zz), **dp = NULL, *dlvls = NULL, j = 0, k = 0; int i = 0, *zptr = 0; void *xptr = NULL, *yptr = NULL, **columns = NULL; double **gp = NULL; double statistic = 0; SEXP xdata; if (ytype == INTSXP) { /* cache the number of levels. */ lly = NLEVELS(yy); yptr = INTEGER(yy); }/*THEN*/ else { yptr = REAL(yy); }/*ELSE*/ /* extract the conditioning variables and cache their types. */ columns = Calloc1D(nsx, sizeof(void *)); nlvls = Calloc1D(nsx, sizeof(int)); df2micg(zz, columns, nlvls, &ndp, &ngp); dp = Calloc1D(ndp + 1, sizeof(int *)); gp = Calloc1D(ngp + 1, sizeof(double *)); dlvls = Calloc1D(ndp + 1, sizeof(int)); for (i = 0, j = 0, k = 0; i < nsx; i++) if (nlvls[i] > 0) { dlvls[1 + j] = nlvls[i]; dp[1 + j++] = columns[i]; }/*THEN*/ else { gp[1 + k++] = columns[i]; }/*ELSE*/ /* allocate vector for the configurations of the discrete parents; or, if * there no discrete parents, for the means of the continuous parents. */ if (ndp > 0) { zptr = Calloc1D(nobs, sizeof(int)); c_fast_config(dp + 1, nobs, ndp, dlvls + 1, zptr, &llz, 1); }/*THEN*/ for (i = 0; i < ntests; i++) { xdata = VECTOR_ELT(xx, i); xtype = TYPEOF(xdata); if (xtype == INTSXP) { xptr = INTEGER(xdata); llx = NLEVELS(xdata); }/*THEN*/ else { xptr = REAL(xdata); }/*ELSE*/ if ((ytype == INTSXP) && (xtype == INTSXP)) { if (ngp > 0) { /* need to reverse conditioning to actually compute the test. */ statistic = 2 * nobs * nobs * c_cmicg_unroll(xptr, llx, yptr, lly, zptr, llz, gp + 1, ngp, df, nobs); }/*THEN*/ else { /* the test reverts back to a discrete mutual information test. */ statistic = 2 * nobs * c_cchisqtest(xptr, llx, yptr, lly, zptr, llz, nobs, df, MI); }/*ELSE*/ }/*THEN*/ else if ((ytype == REALSXP) && (xtype == REALSXP)) { gp[0] = xptr; statistic = 2 * nobs * c_cmicg(yptr, gp, ngp + 1, NULL, 0, zptr, llz, dlvls, nobs); /* one regression coefficient for each conditioning level is added; * if all conditioning variables are continuous that's just one global * regression coefficient. */ *df = (llz == 0) ? 1 : llz; }/*THEN*/ else if ((ytype == INTSXP) && (xtype == REALSXP)) { dp[0] = yptr; dlvls[0] = lly; statistic = 2 * nobs * c_cmicg(xptr, gp + 1, ngp, dp, ndp + 1, zptr, llz, dlvls, nobs); /* for each additional configuration of the discrete conditioning * variables plus the discrete yptr, one whole set of regression * coefficients (plus the intercept) is added. */ *df = (lly - 1) * ((llz == 0) ? 1 : llz) * (ngp + 1); }/*THEN*/ else if ((ytype == REALSXP) && (xtype == INTSXP)) { dp[0] = xptr; dlvls[0] = llx; statistic = 2 * nobs * c_cmicg(yptr, gp + 1, ngp, dp, ndp + 1, zptr, llz, dlvls, nobs); /* same as above, with xptr and yptr swapped. */ *df = (llx - 1) * ((llz == 0) ? 1 : llz) * (ngp + 1); }/*ELSE*/ pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*FOR*/ Free1D(columns); Free1D(nlvls); Free1D(dlvls); Free1D(zptr); Free1D(dp); Free1D(gp); return statistic; }/*CT_MICG*/
double attribute_hidden pnchisq_raw(double x, double f, double theta /* = ncp */, double errmax, double reltol, int itrmax, Rboolean lower_tail, Rboolean log_p) { double lam, x2, f2, term, bound, f_x_2n, f_2n; double l_lam = -1., l_x = -1.; /* initialized for -Wall */ int n; Rboolean lamSml, tSml, is_r, is_b, is_it; LDOUBLE ans, u, v, t, lt, lu =-1; if (x <= 0.) { if(x == 0. && f == 0.) { #define _L (-0.5 * theta) // = -lambda return lower_tail ? R_D_exp(_L) : (log_p ? R_Log1_Exp(_L) : -expm1(_L)); } /* x < 0 or {x==0, f > 0} */ return R_DT_0; } if(!R_FINITE(x)) return R_DT_1; /* This is principally for use from qnchisq */ #ifndef MATHLIB_STANDALONE R_CheckUserInterrupt(); #endif if(theta < 80) { /* use 110 for Inf, as ppois(110, 80/2, lower.tail=FALSE) is 2e-20 */ LDOUBLE ans; int i; // Have pgamma(x,s) < x^s / Gamma(s+1) (< and ~= for small x) // ==> pchisq(x, f) = pgamma(x, f/2, 2) = pgamma(x/2, f/2) // < (x/2)^(f/2) / Gamma(f/2+1) < eps // <==> f/2 * log(x/2) - log(Gamma(f/2+1)) < log(eps) ( ~= -708.3964 ) // <==> log(x/2) < 2/f*(log(Gamma(f/2+1)) + log(eps)) // <==> log(x) < log(2) + 2/f*(log(Gamma(f/2+1)) + log(eps)) if(lower_tail && f > 0. && log(x) < M_LN2 + 2/f*(lgamma(f/2. + 1) + _dbl_min_exp)) { // all pchisq(x, f+2*i, lower_tail, FALSE), i=0,...,110 would underflow to 0. // ==> work in log scale double lambda = 0.5 * theta; double sum, sum2, pr = -lambda; sum = sum2 = ML_NEGINF; /* we need to renormalize here: the result could be very close to 1 */ for(i = 0; i < 110; pr += log(lambda) - log(++i)) { sum2 = logspace_add(sum2, pr); sum = logspace_add(sum, pr + pchisq(x, f+2*i, lower_tail, TRUE)); if (sum2 >= -1e-15) /*<=> EXP(sum2) >= 1-1e-15 */ break; } ans = sum - sum2; #ifdef DEBUG_pnch REprintf("pnchisq(x=%g, f=%g, th.=%g); th. < 80, logspace: i=%d, ans=(sum=%g)-(sum2=%g)\n", x,f,theta, i, (double)sum, (double)sum2); #endif return (double) (log_p ? ans : EXP(ans)); } else { LDOUBLE lambda = 0.5 * theta; LDOUBLE sum = 0, sum2 = 0, pr = EXP(-lambda); // does this need a feature test? /* we need to renormalize here: the result could be very close to 1 */ for(i = 0; i < 110; pr *= lambda/++i) { // pr == exp(-lambda) lambda^i / i! == dpois(i, lambda) sum2 += pr; // pchisq(*, i, *) is strictly decreasing to 0 for lower_tail=TRUE // and strictly increasing to 1 for lower_tail=FALSE sum += pr * pchisq(x, f+2*i, lower_tail, FALSE); if (sum2 >= 1-1e-15) break; } ans = sum/sum2; #ifdef DEBUG_pnch REprintf("pnchisq(x=%g, f=%g, theta=%g); theta < 80: i=%d, sum=%g, sum2=%g\n", x,f,theta, i, (double)sum, (double)sum2); #endif return (double) (log_p ? LOG(ans) : ans); } } // if(theta < 80) // else: theta == ncp >= 80 -------------------------------------------- #ifdef DEBUG_pnch REprintf("pnchisq(x=%g, f=%g, theta=%g >= 80): ",x,f,theta); #endif // Series expansion ------- FIXME: log_p=TRUE, lower_tail=FALSE only applied at end lam = .5 * theta; lamSml = (-lam < _dbl_min_exp); if(lamSml) { /* MATHLIB_ERROR( "non centrality parameter (= %g) too large for current algorithm", theta) */ u = 0; lu = -lam;/* == ln(u) */ l_lam = log(lam); } else { u = exp(-lam); } /* evaluate the first term */ v = u; x2 = .5 * x; f2 = .5 * f; f_x_2n = f - x; #ifdef DEBUG_pnch REprintf("-- v=exp(-th/2)=%g, x/2= %g, f/2= %g\n",v,x2,f2); #endif if(f2 * DBL_EPSILON > 0.125 && /* very large f and x ~= f: probably needs */ FABS(t = x2 - f2) < /* another algorithm anyway */ sqrt(DBL_EPSILON) * f2) { /* evade cancellation error */ /* t = exp((1 - t)*(2 - t/(f2 + 1))) / sqrt(2*M_PI*(f2 + 1));*/ lt = (1 - t)*(2 - t/(f2 + 1)) - M_LN_SQRT_2PI - 0.5 * log(f2 + 1); #ifdef DEBUG_pnch REprintf(" (case I) ==> "); #endif } else { /* Usual case 2: careful not to overflow .. : */ lt = f2*log(x2) -x2 - lgammafn(f2 + 1); } #ifdef DEBUG_pnch REprintf(" lt= %g", lt); #endif tSml = (lt < _dbl_min_exp); if(tSml) { #ifdef DEBUG_pnch REprintf(" is very small\n"); #endif if (x > f + theta + 5* sqrt( 2*(f + 2*theta))) { /* x > E[X] + 5* sigma(X) */ return R_DT_1; /* FIXME: could be more accurate than 0. */ } /* else */ l_x = log(x); ans = term = 0.; t = 0; } else { t = EXP(lt); #ifdef DEBUG_pnch REprintf(", t=exp(lt)= %g\n", t); #endif ans = term = (double) (v * t); } for (n = 1, f_2n = f + 2., f_x_2n += 2.; ; n++, f_2n += 2, f_x_2n += 2) { #ifdef DEBUG_pnch_n REprintf("\n _OL_: n=%d",n); #endif #ifndef MATHLIB_STANDALONE if(n % 1000) R_CheckUserInterrupt(); #endif /* f_2n === f + 2*n * f_x_2n === f - x + 2*n > 0 <==> (f+2n) > x */ if (f_x_2n > 0) { /* find the error bound and check for convergence */ bound = (double) (t * x / f_x_2n); #ifdef DEBUG_pnch_n REprintf("\n L10: n=%d; term= %g; bound= %g",n,term,bound); #endif is_r = is_it = FALSE; /* convergence only if BOTH absolute and relative error < 'bnd' */ if (((is_b = (bound <= errmax)) && (is_r = (term <= reltol * ans))) || (is_it = (n > itrmax))) { #ifdef DEBUG_pnch REprintf("BREAK n=%d %s; bound= %g %s, rel.err= %g %s\n", n, (is_it ? "> itrmax" : ""), bound, (is_b ? "<= errmax" : ""), term/ans, (is_r ? "<= reltol" : "")); #endif break; /* out completely */ } } /* evaluate the next term of the */ /* expansion and then the partial sum */ if(lamSml) { lu += l_lam - log(n); /* u = u* lam / n */ if(lu >= _dbl_min_exp) { /* no underflow anymore ==> change regime */ #ifdef DEBUG_pnch_n REprintf(" n=%d; nomore underflow in u = exp(lu) ==> change\n", n); #endif v = u = EXP(lu); /* the first non-0 'u' */ lamSml = FALSE; } } else { u *= lam / n; v += u; } if(tSml) { lt += l_x - log(f_2n);/* t <- t * (x / f2n) */ if(lt >= _dbl_min_exp) { /* no underflow anymore ==> change regime */ #ifdef DEBUG_pnch REprintf(" n=%d; nomore underflow in t = exp(lt) ==> change\n", n); #endif t = EXP(lt); /* the first non-0 't' */ tSml = FALSE; } } else { t *= x / f_2n; } if(!lamSml && !tSml) { term = (double) (v * t); ans += term; } } /* for(n ...) */ if (is_it) { MATHLIB_WARNING2(_("pnchisq(x=%g, ..): not converged in %d iter."), x, itrmax); } #ifdef DEBUG_pnch REprintf("\n == L_End: n=%d; term= %g; bound=%g\n",n,term,bound); #endif double dans = (double) ans; return R_DT_val(dans); }
double F77_SUB(cdfchisq)(double *x, double *df, int *lower_tail, int *log_p) { return pchisq(*x, *df, *lower_tail, *log_p); }
double C_quadformConditionalPvalue(const double tstat, const double df) { return(pchisq(tstat, df, 0, 0)); }
/** * Computes the Hardy-Weinberg Likelihood Ratio Test Statistic * * @pls - PHRED genotype likelihoods * @no_samples - number of samples * @ploidy - ploidy * @no_alleles - number of alleles * @MLE_HWE_AF - estimated AF assuming HWE * @MLE_GF - estimated GF * @n - effective sample size * @lr - log10 likelihood ratio * @logp - likelihood ratio test log p-value * @df - degrees of freedom * */ void Estimator::compute_hwe_lrt(int32_t *pls, int32_t no_samples, int32_t ploidy, int32_t no_alleles, float *MLE_HWE_GF, float *MLE_GF, int32_t& n, float& lr, float& logp, int32_t& df) { n = 0; if (ploidy==2) { if (no_alleles==2) { int32_t no_genotypes = 3; float l0=0, la=0; for (size_t k=0; k<no_samples; ++k) { size_t offset = k*3; if (pls[offset]==bcf_int32_missing) continue; ++n; float p = lt->pl2prob(pls[offset]); float l0i = MLE_HWE_GF[0] * p; float lai = MLE_GF[0] * p; p = lt->pl2prob(pls[offset+1]); l0i += MLE_HWE_GF[1] * p; lai += MLE_GF[1] * p; p = lt->pl2prob(pls[offset+2]); l0i += MLE_HWE_GF[2] * p; lai += MLE_GF[2] * p; l0 += log(l0i); la += log(lai); } if (!n) return; lr = l0-la; float lrts = lr>0 ? 0 : -2*lr; df = 1; logp = pchisq(lrts, 1, 0, 1); } else { int32_t no_genotypes = bcf_an2gn(no_alleles); float l0=0, la=0; for (size_t k=0; k<no_samples; ++k) { size_t offset = k*no_genotypes; if (pls[offset]==bcf_int32_missing) continue; ++n; float l0i=0, lai=0; for (size_t j=0; j<no_genotypes; ++j) { float p = lt->pl2prob(pls[offset+j]); l0i += MLE_HWE_GF[j]*p; lai += MLE_GF[j]*p; } l0 += log(l0i); la += log(lai); } if (!n) return; lr = l0-la; float lrts = lr>0 ? 0 : -2*lr; df = no_genotypes-no_alleles; logp = pchisq(lrts, df, 0, 1); } } };