/* unconditional Gaussian mutual information, to be used in C code. */ double c_mig(double *xx, double *yy, int *num) { double cor = c_fast_cor(xx, yy, num); return - 0.5 * log(1 - cor * cor); }/*C_MIG*/
/* parametric tests for Gaussian variables. */ static double ut_gaustests(SEXP xx, SEXP yy, int nobs, int ntests, double *pvalue, double *df, test_e test) { int i = 0; double transform = 0, *xptr = NULL, *yptr = REAL(yy); double xm = 0, ym = 0, xsd = 0, ysd = 0, statistic = 0; /* compute the degrees of freedom for correlation and mutual information. */ if (test == COR) *df = nobs - 2; else if ((test == MI_G) || (test == MI_G_SH)) *df = 1; if (((test == COR) && (*df < 1)) || ((test == ZF) && (nobs - 2 < 2))) { /* if there are not enough degrees of freedom, return independence. */ warning("trying to do an independence test with zero degrees of freedom."); *df = 0; statistic = 0; for (i = 0; i < ntests; i++) pvalue[i] = 1; return statistic; }/*THEN*/ /* cache mean and variance. */ ym = c_mean(yptr, nobs); ysd = c_sse(yptr, ym, nobs); for (i = 0; i < ntests; i++) { GAUSSIAN_SWAP_X(); statistic = c_fast_cor(xptr, yptr, nobs, xm, ym, xsd, ysd); if (test == COR) { transform = cor_t_trans(statistic, *df); pvalue[i] = 2 * pt(fabs(transform), *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_G) { statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_G_SH) { statistic *= 1 - cor_lambda(xptr, yptr, nobs, xm, ym, xsd, ysd, statistic); statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == ZF) { statistic = cor_zf_trans(statistic, (double)nobs - 2); pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE); }/*THEN*/ }/*FOR*/ return statistic; }/*UT_GAUSTESTS*/
/* conditional linear Gaussian mutual information test. */ static double ut_micg(SEXP xx, SEXP yy, int nobs, int ntests, double *pvalue, double *df) { int i = 0, xtype = 0, ytype = TYPEOF(yy), llx = 0, lly = 0; double xm = 0, xsd = 0, ym = 0, ysd = 0, statistic = 0; void *xptr = NULL, *yptr = NULL; SEXP xdata; if (ytype == INTSXP) { /* cache the number of levels. */ lly = NLEVELS(yy); yptr = INTEGER(yy); }/*THEN*/ else { /* cache mean and variance. */ yptr = REAL(yy); ym = c_mean(yptr, nobs); ysd = c_sse(yptr, ym, nobs); }/*ELSE*/ for (i = 0; i < ntests; i++) { xdata = VECTOR_ELT(xx, i); xtype = TYPEOF(xdata); if ((ytype == INTSXP) && (xtype == INTSXP)) { /* if both nodes are discrete, the test reverts back to a discrete * mutual information test. */ xptr = INTEGER(xdata); llx = NLEVELS(xdata); DISCRETE_SWAP_X(); statistic = 2 * nobs * c_chisqtest(xptr, llx, yptr, lly, nobs, df, MI); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if ((ytype == REALSXP) && (xtype == REALSXP)) { /* if both nodes are continuous, the test reverts back to a Gaussian * mutual information test. */ xptr = REAL(xdata); xm = c_mean(xptr, nobs); xsd = c_sse(xptr, xm, nobs); statistic = c_fast_cor(xptr, yptr, nobs, xm, ym, xsd, ysd); *df = 1; statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else { if (xtype == INTSXP) { xptr = INTEGER(xdata); llx = NLEVELS(xdata); ysd = sqrt(ysd / (nobs - 1)); statistic = 2 * nobs * c_micg(yptr, ym, ysd, xptr, llx, nobs); *df = llx - 1; pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else { xptr = REAL(xdata); xm = c_mean(xptr, nobs); xsd = sqrt(c_sse(xptr, xm, nobs) / (nobs - 1)); statistic = 2 * nobs * c_micg(xptr, xm, xsd, yptr, lly, nobs); *df = lly - 1; pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*ELSE*/ }/*THEN*/ }/*FOR*/ return statistic; }/*UT_MICG*/
/* compute all the pairwise mutual information coefficients between the variables. */ void mi_matrix(double *mim, void **columns, int dim, int *nlevels, int *num, void *cond, int *clevels, double *means, double *sse, int *est) { int i = 0, j = 0; switch(*est) { case DISCRETE_MAXIMUM_LIKELIHOOD: if (!cond) { for (i = 0; i < dim; i++) { for (j = i + 1; j < dim; j++) { mim[UPTRI3(i + 1, j + 1, dim)] = c_chisqtest(((int **)columns)[i], nlevels[i], ((int **)columns)[j], nlevels[j], *num, NULL, MI, FALSE); }/*FOR*/ }/*FOR*/ }/*THEN*/ else { for (i = 0; i < dim; i++) { for (j = i + 1; j < dim; j++) { mim[UPTRI3(i + 1, j + 1, dim)] = c_cchisqtest(((int **)columns)[i], nlevels[i], ((int **)columns)[j], nlevels[j], (int *)cond, *clevels, *num, NULL, MI, FALSE); }/*FOR*/ }/*FOR*/ }/*ELSE*/ break; case GAUSSIAN_MAXIMUM_LIKELIHOOD: for (i = 0; i < dim; i++) { for (j = i + 1; j < dim; j++) { mim[UPTRI3(i + 1, j + 1, dim)] = cor_mi_trans( c_fast_cor(((double **)columns)[i], ((double **)columns)[j], *num, means[i], means[j], sse[i], sse[j])); }/*FOR*/ }/*FOR*/ break; }/*SWITCH*/ }/*MI_MATRIX*/