/* unconditional mutual information, to be used for the asymptotic test. */ SEXP mi(SEXP x, SEXP y, SEXP gsquare, SEXP adjusted) { int llx = NLEVELS(x), lly = NLEVELS(y), num = length(x); int *xx = INTEGER(x), *yy = INTEGER(y); double *res = NULL; SEXP result; PROTECT(result = allocVector(REALSXP, 2)); res = REAL(result); if (isTRUE(adjusted)) res[0] = c_chisqtest(xx, llx, yy, lly, num, res + 1, MI_ADF); else res[0] = c_chisqtest(xx, llx, yy, lly, num, res + 1, MI); /* rescale to match the G^2 test. */ if (isTRUE(gsquare)) res[0] *= 2 * num; UNPROTECT(1); return result; }/*MI*/
/* parametric tests for discrete variables. */ static double ut_discrete(SEXP xx, SEXP yy, int nobs, int ntests, double *pvalue, double *df, test_e test) { int i = 0, llx = 0, lly = NLEVELS(yy), *xptr = NULL, *yptr = INTEGER(yy); double statistic = 0; SEXP xdata; for (i = 0; i < ntests; i++) { DISCRETE_SWAP_X(); if (test == MI || test == MI_ADF || test == X2 || test == X2_ADF) { /* mutual information and Pearson's X^2 asymptotic tests. */ statistic = c_chisqtest(xptr, llx, yptr, lly, nobs, df, test); if ((test == MI) || (test == MI_ADF)) statistic = 2 * nobs * statistic; pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_SH) { /* shrinkage mutual information test. */ statistic = 2 * nobs * c_shmi(xptr, llx, yptr, lly, nobs); *df = ((double)(llx - 1) * (double)(lly - 1)); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == JT) { /* Jonckheere-Terpstra test. */ statistic = c_jt(xptr, llx, yptr, lly, nobs); pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE); }/*THEN*/ }/*FOR*/ return statistic; }/*UT_DISCRETE*/
/* conditional linear Gaussian mutual information test. */ static double ut_micg(SEXP xx, SEXP yy, int nobs, int ntests, double *pvalue, double *df) { int i = 0, xtype = 0, ytype = TYPEOF(yy), llx = 0, lly = 0; double xm = 0, xsd = 0, ym = 0, ysd = 0, statistic = 0; void *xptr = NULL, *yptr = NULL; SEXP xdata; if (ytype == INTSXP) { /* cache the number of levels. */ lly = NLEVELS(yy); yptr = INTEGER(yy); }/*THEN*/ else { /* cache mean and variance. */ yptr = REAL(yy); ym = c_mean(yptr, nobs); ysd = c_sse(yptr, ym, nobs); }/*ELSE*/ for (i = 0; i < ntests; i++) { xdata = VECTOR_ELT(xx, i); xtype = TYPEOF(xdata); if ((ytype == INTSXP) && (xtype == INTSXP)) { /* if both nodes are discrete, the test reverts back to a discrete * mutual information test. */ xptr = INTEGER(xdata); llx = NLEVELS(xdata); DISCRETE_SWAP_X(); statistic = 2 * nobs * c_chisqtest(xptr, llx, yptr, lly, nobs, df, MI); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if ((ytype == REALSXP) && (xtype == REALSXP)) { /* if both nodes are continuous, the test reverts back to a Gaussian * mutual information test. */ xptr = REAL(xdata); xm = c_mean(xptr, nobs); xsd = c_sse(xptr, xm, nobs); statistic = c_fast_cor(xptr, yptr, nobs, xm, ym, xsd, ysd); *df = 1; statistic = 2 * nobs * cor_mi_trans(statistic); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else { if (xtype == INTSXP) { xptr = INTEGER(xdata); llx = NLEVELS(xdata); ysd = sqrt(ysd / (nobs - 1)); statistic = 2 * nobs * c_micg(yptr, ym, ysd, xptr, llx, nobs); *df = llx - 1; pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else { xptr = REAL(xdata); xm = c_mean(xptr, nobs); xsd = sqrt(c_sse(xptr, xm, nobs) / (nobs - 1)); statistic = 2 * nobs * c_micg(xptr, xm, xsd, yptr, lly, nobs); *df = lly - 1; pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*ELSE*/ }/*THEN*/ }/*FOR*/ return statistic; }/*UT_MICG*/
/* compute all the pairwise mutual information coefficients between the variables. */ void mi_matrix(double *mim, void **columns, int dim, int *nlevels, int *num, void *cond, int *clevels, double *means, double *sse, int *est) { int i = 0, j = 0; switch(*est) { case DISCRETE_MAXIMUM_LIKELIHOOD: if (!cond) { for (i = 0; i < dim; i++) { for (j = i + 1; j < dim; j++) { mim[UPTRI3(i + 1, j + 1, dim)] = c_chisqtest(((int **)columns)[i], nlevels[i], ((int **)columns)[j], nlevels[j], *num, NULL, MI, FALSE); }/*FOR*/ }/*FOR*/ }/*THEN*/ else { for (i = 0; i < dim; i++) { for (j = i + 1; j < dim; j++) { mim[UPTRI3(i + 1, j + 1, dim)] = c_cchisqtest(((int **)columns)[i], nlevels[i], ((int **)columns)[j], nlevels[j], (int *)cond, *clevels, *num, NULL, MI, FALSE); }/*FOR*/ }/*FOR*/ }/*ELSE*/ break; case GAUSSIAN_MAXIMUM_LIKELIHOOD: for (i = 0; i < dim; i++) { for (j = i + 1; j < dim; j++) { mim[UPTRI3(i + 1, j + 1, dim)] = cor_mi_trans( c_fast_cor(((double **)columns)[i], ((double **)columns)[j], *num, means[i], means[j], sse[i], sse[j])); }/*FOR*/ }/*FOR*/ break; }/*SWITCH*/ }/*MI_MATRIX*/