/* initialize a three-dimensional contingency table and the marginals. */ void fill_3d_table(int *xx, int *yy, int *zz, int ****n, int ***ni, int ***nj, int **nk, int llx, int lly, int llz, int num) { int i = 0, j = 0, k = 0; *n = alloc3dcont(llz, llx, lly); *ni = alloc2dcont(llz, llx); *nj = alloc2dcont(llz, lly); *nk = alloc1dcont(llz); /* compute the joint frequency of x, y, and z. */ for (k = 0; k < num; k++) (*n)[zz[k] - 1][xx[k] - 1][yy[k] - 1]++; /* compute the marginals. */ for (i = 0; i < llx; i++) for (j = 0; j < lly; j++) for (k = 0; k < llz; k++) { (*ni)[k][i] += (*n)[k][i][j]; (*nj)[k][j] += (*n)[k][i][j]; (*nk)[k] += (*n)[k][i][j]; }/*FOR*/ }/*FILL_3D_TABLE*/
/* unconditional mutual information, to be used in C code. */ double c_mi(int *xx, int *llx, int *yy, int *lly, int *num) { int i = 0, j = 0, k = 0; int **n = NULL, *ni = NULL, *nj = NULL; double res = 0; /* initialize the contingency table and the marginal frequencies. */ n = alloc2dcont(*llx, *lly); ni = alloc1dcont(*llx); nj = alloc1dcont(*lly); /* compute the joint frequency of x and y. */ for (k = 0; k < *num; k++) { n[xx[k] - 1][yy[k] - 1]++; }/*FOR*/ /* compute the marginals. */ for (i = 0; i < *llx; i++) for (j = 0; j < *lly; j++) { ni[i] += n[i][j]; nj[j] += n[i][j]; }/*FOR*/ /* compute the mutual information from the joint and marginal frequencies. */ for (i = 0; i < *llx; i++) for (j = 0; j < *lly; j++) res += MI_PART(n[i][j], ni[i], nj[j], *num); return (res)/(*num); }/*C_MI*/
/* conditional mutual information, to be used in C code. */ double c_cmi(int *xx, int *llx, int *yy, int *lly, int *zz, int *llz, int *num) { int i = 0, j = 0, k = 0; int ***n = NULL, **ni = NULL, **nj = NULL, *nk = NULL; double res = 0; /* initialize the contingency table and the marginal frequencies. */ n = alloc3dcont(*llx, *lly, *llz); ni = alloc2dcont(*llx, *llz); nj = alloc2dcont(*lly, *llz); nk = alloc1dcont(*llz); /* compute the joint frequency of x, y, and z. */ for (k = 0; k < *num; k++) { n[xx[k] - 1][yy[k] - 1][zz[k] - 1]++; }/*FOR*/ /* compute the marginals. */ for (i = 0; i < *llx; i++) for (j = 0; j < *lly; j++) for (k = 0; k < *llz; k++) { ni[i][k] += n[i][j][k]; nj[j][k] += n[i][j][k]; nk[k] += n[i][j][k]; }/*FOR*/ /* compute the conditional mutual information from the joint and marginal frequencies. */ for (i = 0; i < *llx; i++) for (j = 0; j < *lly; j++) for (k = 0; k < *llz; k++) res += MI_PART(n[i][j][k], ni[i][k], nj[j][k], nk[k]); res = res/(*num); return res; }/*C_CMI*/
SEXP cdlik(SEXP x, SEXP y) { int i = 0, j = 0, k = 0; int **n = NULL, *nj = NULL; int llx = NLEVELS(x), lly = NLEVELS(y), num = LENGTH(x); int *xx = INTEGER(x), *yy = INTEGER(y); double *res = NULL; SEXP result; /* allocate and initialize result to zero. */ PROTECT(result = allocVector(REALSXP, 1)); res = REAL(result); *res = 0; /* initialize the contingency table and the marginal frequencies. */ n = alloc2dcont(llx, lly); nj = alloc1dcont(lly); /* compute the joint frequency of x and y. */ for (k = 0; k < num; k++) { n[xx[k] - 1][yy[k] - 1]++; }/*FOR*/ /* compute the marginals. */ for (i = 0; i < llx; i++) for (j = 0; j < lly; j++) { nj[j] += n[i][j]; }/*FOR*/ /* compute the conditional entropy from the joint and marginal frequencies. */ for (i = 0; i < llx; i++) for (j = 0; j < lly; j++) { if (n[i][j] != 0) *res += (double)n[i][j] * log((double)n[i][j] / (double)nj[j]); }/*FOR*/ UNPROTECT(1); return result; }/*CDLIK*/
/* initialize a two-dimensional contingency table and the marginals. */ void fill_2d_table(int *xx, int *yy, int ***n, int **ni, int **nj, int llx, int lly, int num) { int i = 0, j = 0, k = 0; *n = alloc2dcont(llx, lly); *ni = alloc1dcont(llx); *nj = alloc1dcont(lly); /* compute the joint frequency of x and y. */ for (k = 0; k < num; k++) (*n)[xx[k] - 1][yy[k] - 1]++; /* compute the marginals. */ for (i = 0; i < llx; i++) for (j = 0; j < lly; j++) { (*ni)[i] += (*n)[i][j]; (*nj)[j] += (*n)[i][j]; }/*FOR*/ }/*FILL_2D_TABLE*/
/* conditional posterior dirichlet probability (covers BDe and K2 scores). */ SEXP cdpost(SEXP x, SEXP y, SEXP iss, SEXP exp, SEXP nparams) { int i = 0, j = 0, k = 0, imaginary = 0, num = LENGTH(x); int llx = NLEVELS(x), lly = NLEVELS(y), *xx = INTEGER(x), *yy = INTEGER(y); int **n = NULL, *nj = NULL; double alpha = 0, *res = NULL, *p = REAL(nparams); SEXP result; if (isNull(iss)) { /* this is for K2, which does not define an imaginary sample size; * all hyperparameters are set to 1 in the prior distribution. */ imaginary = (int) *p; alpha = 1; }/*THEN*/ else { /* this is for the BDe score. */ imaginary = INT(iss); alpha = (double) imaginary / *p; }/*ELSE*/ /* allocate and initialize result to zero. */ PROTECT(result = allocVector(REALSXP, 1)); res = REAL(result); *res = 0; /* initialize the contingency table. */ n = alloc2dcont(llx, lly); nj = alloc1dcont(lly); /* compute the joint frequency of x and y. */ if (exp == R_NilValue) { for (i = 0; i < num; i++) { n[xx[i] - 1][yy[i] - 1]++; nj[yy[i] - 1]++; }/*FOR*/ }/*THEN*/ else { int *e = INTEGER(exp); for (i = 0, k = 0; i < num; i++) { if (i != e[k] - 1) { n[xx[i] - 1][yy[i] - 1]++; nj[yy[i] - 1]++; }/*THEN*/ else { k++; }/*ELSE*/ }/*FOR*/ /* adjust the sample size to match the number of observational data. */ num -= LENGTH(exp); }/*ELSE*/ /* compute the conditional posterior probability. */ for (i = 0; i < llx; i++) for (j = 0; j < lly; j++) *res += lgammafn(n[i][j] + alpha) - lgammafn(alpha); for (j = 0; j < lly; j++) *res += lgammafn((double)imaginary / lly) - lgammafn(nj[j] + (double)imaginary / lly); UNPROTECT(1); return result; }/*CDPOST*/
/* conditional Monte Carlo simulation for discrete tests. */ SEXP cmcarlo_mean(SEXP x, SEXP y, SEXP z, SEXP lx, SEXP ly, SEXP lz, SEXP length, SEXP samples, SEXP test) { double *fact = NULL, *res = NULL, observed = 0; int **n = NULL, **ncolt = NULL, **nrowt = NULL, *ncond = NULL, *workspace = NULL; int *num = INTEGER(length), *B = INTEGER(samples); int *nr = INTEGER(lx), *nc = INTEGER(ly), *nl = INTEGER(lz); int *xx = INTEGER(x), *yy = INTEGER(y), *zz = INTEGER(z); int i = 0, j = 0, k = 0, npermuts = 0; SEXP result; /* allocate and initialize the result */ PROTECT(result = allocVector(REALSXP, 3)); res = REAL(result); res[0] = res[1] = res[2] = 0; // initial test score / mean score / nb permutations /* allocate and compute the factorials needed by rcont2. */ allocfact(*num); /* allocate and initialize the workspace for rcont2. */ workspace = alloc1dcont(*nc); /* initialize the contingency table. */ n = alloc2dcont(*nl, (*nr) * (*nc)); /* initialize the marginal frequencies. */ nrowt = alloc2dcont(*nl, *nr); ncolt = alloc2dcont(*nl, *nc); ncond = alloc1dcont(*nl); /* compute the joint frequency of x and y. */ for (k = 0; k < *num; k++) n[zz[k] - 1][CMC(xx[k] - 1, yy[k] - 1, *nr)]++; /* compute the marginals. */ for (i = 0; i < *nr; i++) for (j = 0; j < *nc; j++) for (k = 0; k < *nl; k++) { nrowt[k][i] += n[k][CMC(i, j, *nr)]; ncolt[k][j] += n[k][CMC(i, j, *nr)]; ncond[k] += n[k][CMC(i, j, *nr)]; }/*FOR*/ /* initialize the random number generator. */ GetRNGstate(); /* pick up the observed value of the test statistic, then generate a set of random contingency tables (given row and column totals) and adds their test scores to compute the mean.*/ switch(INT(test)) { case MUTUAL_INFORMATION: observed = 2 * _cmi(n, nrowt, ncolt, ncond, nr, nc, nl); for (j = 0; j < *B; j++) { for (k = 0; k < *nl; k++) rcont2(nr, nc, nrowt[k], ncolt[k], &(ncond[k]), fact, workspace, n[k]); res[1] += 2 * _cmi(n, nrowt, ncolt, ncond, nr, nc, nl); npermuts++; } break; case PEARSON_X2: observed = _cx2(n, nrowt, ncolt, ncond, nr, nc, nl); for (j = 0; j < *B; j++) { for (k = 0; k < *nl; k++) rcont2(nr, nc, nrowt[k], ncolt[k], &(ncond[k]), fact, workspace, n[k]); res[1] += _cx2(n, nrowt, ncolt, ncond, nr, nc, nl); npermuts++; } break; }/*SWITCH*/ PutRNGstate(); /* save the observed and mean values of the statistic, and the number of permutations performed. */ res[0] = observed; res[1] /= *B; // mean res[2] = npermuts; UNPROTECT(1); return result; }/*CMCARLO_MEAN*/