/* parametric tests for discrete variables. */ static double ct_discrete(SEXP xx, SEXP yy, SEXP zz, int nobs, int ntests, double *pvalue, double *df, test_e test) { int i = 0, llx = 0, lly = NLEVELS(yy), llz = 0; int *xptr = NULL, *yptr = INTEGER(yy), *zptr = NULL; double statistic = 0; SEXP xdata, config; DISCRETE_CACHE(); for (i = 0; i < ntests; i++) { DISCRETE_SWAP_X(); if (test == MI || test == MI_ADF || test == X2 || test == X2_ADF) { /* mutual information and Pearson's X^2 asymptotic tests. */ statistic = c_cchisqtest(xptr, llx, yptr, lly, zptr, llz, nobs, df, test); if ((test == MI) || (test == MI_ADF)) statistic = 2 * nobs * statistic; pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == MI_SH) { /* shrinkage mutual information test. */ statistic = 2 * nobs * c_shcmi(xptr, llx, yptr, lly, zptr, llz, nobs, df); pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*THEN*/ else if (test == JT) { /* Jonckheere-Terpstra test. */ statistic = c_cjt(xptr, llx, yptr, lly, zptr, llz, nobs); pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE); }/*THEN*/ }/*FOR*/ UNPROTECT(1); return statistic; }/*CT_DISCRETE*/
/* compute all the pairwise mutual information coefficients between the variables. */ void mi_matrix(double *mim, void **columns, int dim, int *nlevels, int *num, void *cond, int *clevels, double *means, double *sse, int *est) { int i = 0, j = 0; switch(*est) { case DISCRETE_MAXIMUM_LIKELIHOOD: if (!cond) { for (i = 0; i < dim; i++) { for (j = i + 1; j < dim; j++) { mim[UPTRI3(i + 1, j + 1, dim)] = c_chisqtest(((int **)columns)[i], nlevels[i], ((int **)columns)[j], nlevels[j], *num, NULL, MI, FALSE); }/*FOR*/ }/*FOR*/ }/*THEN*/ else { for (i = 0; i < dim; i++) { for (j = i + 1; j < dim; j++) { mim[UPTRI3(i + 1, j + 1, dim)] = c_cchisqtest(((int **)columns)[i], nlevels[i], ((int **)columns)[j], nlevels[j], (int *)cond, *clevels, *num, NULL, MI, FALSE); }/*FOR*/ }/*FOR*/ }/*ELSE*/ break; case GAUSSIAN_MAXIMUM_LIKELIHOOD: for (i = 0; i < dim; i++) { for (j = i + 1; j < dim; j++) { mim[UPTRI3(i + 1, j + 1, dim)] = cor_mi_trans( c_fast_cor(((double **)columns)[i], ((double **)columns)[j], *num, means[i], means[j], sse[i], sse[j])); }/*FOR*/ }/*FOR*/ break; }/*SWITCH*/ }/*MI_MATRIX*/
/* conditional linear Gaussian mutual information test. */ static double ct_micg(SEXP xx, SEXP yy, SEXP zz, int nobs, int ntests, double *pvalue, double *df) { int xtype = 0, ytype = TYPEOF(yy), *nlvls = NULL, llx = 0, lly = 0, llz = 0; int ndp = 0, ngp = 0, nsx = length(zz), **dp = NULL, *dlvls = NULL, j = 0, k = 0; int i = 0, *zptr = 0; void *xptr = NULL, *yptr = NULL, **columns = NULL; double **gp = NULL; double statistic = 0; SEXP xdata; if (ytype == INTSXP) { /* cache the number of levels. */ lly = NLEVELS(yy); yptr = INTEGER(yy); }/*THEN*/ else { yptr = REAL(yy); }/*ELSE*/ /* extract the conditioning variables and cache their types. */ columns = Calloc1D(nsx, sizeof(void *)); nlvls = Calloc1D(nsx, sizeof(int)); df2micg(zz, columns, nlvls, &ndp, &ngp); dp = Calloc1D(ndp + 1, sizeof(int *)); gp = Calloc1D(ngp + 1, sizeof(double *)); dlvls = Calloc1D(ndp + 1, sizeof(int)); for (i = 0, j = 0, k = 0; i < nsx; i++) if (nlvls[i] > 0) { dlvls[1 + j] = nlvls[i]; dp[1 + j++] = columns[i]; }/*THEN*/ else { gp[1 + k++] = columns[i]; }/*ELSE*/ /* allocate vector for the configurations of the discrete parents; or, if * there no discrete parents, for the means of the continuous parents. */ if (ndp > 0) { zptr = Calloc1D(nobs, sizeof(int)); c_fast_config(dp + 1, nobs, ndp, dlvls + 1, zptr, &llz, 1); }/*THEN*/ for (i = 0; i < ntests; i++) { xdata = VECTOR_ELT(xx, i); xtype = TYPEOF(xdata); if (xtype == INTSXP) { xptr = INTEGER(xdata); llx = NLEVELS(xdata); }/*THEN*/ else { xptr = REAL(xdata); }/*ELSE*/ if ((ytype == INTSXP) && (xtype == INTSXP)) { if (ngp > 0) { /* need to reverse conditioning to actually compute the test. */ statistic = 2 * nobs * nobs * c_cmicg_unroll(xptr, llx, yptr, lly, zptr, llz, gp + 1, ngp, df, nobs); }/*THEN*/ else { /* the test reverts back to a discrete mutual information test. */ statistic = 2 * nobs * c_cchisqtest(xptr, llx, yptr, lly, zptr, llz, nobs, df, MI); }/*ELSE*/ }/*THEN*/ else if ((ytype == REALSXP) && (xtype == REALSXP)) { gp[0] = xptr; statistic = 2 * nobs * c_cmicg(yptr, gp, ngp + 1, NULL, 0, zptr, llz, dlvls, nobs); /* one regression coefficient for each conditioning level is added; * if all conditioning variables are continuous that's just one global * regression coefficient. */ *df = (llz == 0) ? 1 : llz; }/*THEN*/ else if ((ytype == INTSXP) && (xtype == REALSXP)) { dp[0] = yptr; dlvls[0] = lly; statistic = 2 * nobs * c_cmicg(xptr, gp + 1, ngp, dp, ndp + 1, zptr, llz, dlvls, nobs); /* for each additional configuration of the discrete conditioning * variables plus the discrete yptr, one whole set of regression * coefficients (plus the intercept) is added. */ *df = (lly - 1) * ((llz == 0) ? 1 : llz) * (ngp + 1); }/*THEN*/ else if ((ytype == REALSXP) && (xtype == INTSXP)) { dp[0] = xptr; dlvls[0] = llx; statistic = 2 * nobs * c_cmicg(yptr, gp + 1, ngp, dp, ndp + 1, zptr, llz, dlvls, nobs); /* same as above, with xptr and yptr swapped. */ *df = (llx - 1) * ((llz == 0) ? 1 : llz) * (ngp + 1); }/*ELSE*/ pvalue[i] = pchisq(statistic, *df, FALSE, FALSE); }/*FOR*/ Free1D(columns); Free1D(nlvls); Free1D(dlvls); Free1D(zptr); Free1D(dp); Free1D(gp); return statistic; }/*CT_MICG*/