Ejemplo n.º 1
0
/* initialize a three-dimensional contingency table and the marginals. */
void fill_3d_table(int *xx, int *yy, int *zz, int ****n, int ***ni, int ***nj,
    int **nk, int llx, int lly, int llz, int num) {

int i = 0, j = 0, k = 0;

  *n = alloc3dcont(llz, llx, lly);
  *ni = alloc2dcont(llz, llx);
  *nj = alloc2dcont(llz, lly);
  *nk = alloc1dcont(llz);

  /* compute the joint frequency of x, y, and z. */
  for (k = 0; k < num; k++)
    (*n)[zz[k] - 1][xx[k] - 1][yy[k] - 1]++;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      for (k = 0; k < llz; k++) {

        (*ni)[k][i] += (*n)[k][i][j];
        (*nj)[k][j] += (*n)[k][i][j];
        (*nk)[k] += (*n)[k][i][j];

      }/*FOR*/

}/*FILL_3D_TABLE*/
Ejemplo n.º 2
0
/* unconditional mutual information, to be used in C code. */
double c_mi(int *xx, int *llx, int *yy, int *lly, int *num) {

int i = 0, j = 0, k = 0;
int  **n = NULL, *ni = NULL, *nj = NULL;
double res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = alloc2dcont(*llx, *lly);
  ni = alloc1dcont(*llx);
  nj = alloc1dcont(*lly);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < *num; k++) {

    n[xx[k] - 1][yy[k] - 1]++;

  }/*FOR*/

  /* compute the marginals. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++) {

    ni[i] += n[i][j];
    nj[j] += n[i][j];

  }/*FOR*/

  /* compute the mutual information from the joint and marginal frequencies. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++)
      res += MI_PART(n[i][j], ni[i], nj[j], *num);

  return (res)/(*num);

}/*C_MI*/
Ejemplo n.º 3
0
/* conditional mutual information, to be used in C code. */
double c_cmi(int *xx, int *llx, int *yy, int *lly, int *zz, int *llz, int *num) {

int i = 0, j = 0, k = 0;
int ***n = NULL, **ni = NULL, **nj = NULL, *nk = NULL;
double res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = alloc3dcont(*llx, *lly, *llz);
  ni = alloc2dcont(*llx, *llz);
  nj = alloc2dcont(*lly, *llz);
  nk = alloc1dcont(*llz);

  /* compute the joint frequency of x, y, and z. */
  for (k = 0; k < *num; k++) {

    n[xx[k] - 1][yy[k] - 1][zz[k] - 1]++;

  }/*FOR*/

  /* compute the marginals. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++)
      for (k = 0; k < *llz; k++) {

        ni[i][k] += n[i][j][k];
        nj[j][k] += n[i][j][k];
        nk[k] += n[i][j][k];

      }/*FOR*/

  /* compute the conditional mutual information from the joint and
     marginal frequencies. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++)
      for (k = 0; k < *llz; k++)
        res += MI_PART(n[i][j][k], ni[i][k], nj[j][k], nk[k]);

  res = res/(*num);

  return res;

}/*C_CMI*/
SEXP cdlik(SEXP x, SEXP y) {

int i = 0, j = 0, k = 0;
int **n = NULL, *nj = NULL;
int llx = NLEVELS(x), lly = NLEVELS(y), num = LENGTH(x);
int *xx = INTEGER(x), *yy = INTEGER(y);
double *res = NULL;
SEXP result;

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = alloc2dcont(llx, lly);
  nj = alloc1dcont(lly);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < num; k++) {

    n[xx[k] - 1][yy[k] - 1]++;

  }/*FOR*/

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++) {

      nj[j] += n[i][j];

    }/*FOR*/

  /* compute the conditional entropy from the joint and marginal
       frequencies. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++) {

      if (n[i][j] != 0)
        *res += (double)n[i][j] * log((double)n[i][j] / (double)nj[j]);

    }/*FOR*/

  UNPROTECT(1);

  return result;

}/*CDLIK*/
Ejemplo n.º 5
0
/* initialize a two-dimensional contingency table and the marginals. */
void fill_2d_table(int *xx, int *yy, int ***n, int **ni, int **nj, int llx,
    int lly, int num) {

int i = 0, j = 0, k = 0;

  *n = alloc2dcont(llx, lly);
  *ni = alloc1dcont(llx);
  *nj = alloc1dcont(lly);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < num; k++)
    (*n)[xx[k] - 1][yy[k] - 1]++;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++) {

    (*ni)[i] += (*n)[i][j];
    (*nj)[j] += (*n)[i][j];

  }/*FOR*/

}/*FILL_2D_TABLE*/
Ejemplo n.º 6
0
/* conditional posterior dirichlet probability (covers BDe and K2 scores). */
SEXP cdpost(SEXP x, SEXP y, SEXP iss, SEXP exp, SEXP nparams) {

int i = 0, j = 0, k = 0, imaginary = 0, num = LENGTH(x);
int llx = NLEVELS(x), lly = NLEVELS(y), *xx = INTEGER(x), *yy = INTEGER(y);
int **n = NULL, *nj = NULL;
double alpha = 0, *res = NULL, *p = REAL(nparams);
SEXP result;

  if (isNull(iss)) {

    /* this is for K2, which does not define an imaginary sample size;
     * all hyperparameters are set to 1 in the prior distribution. */
    imaginary = (int) *p;
    alpha = 1;

  }/*THEN*/
  else {

    /* this is for the BDe score. */
    imaginary = INT(iss);
    alpha = (double) imaginary / *p;

  }/*ELSE*/

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* initialize the contingency table. */
  n = alloc2dcont(llx, lly);
  nj = alloc1dcont(lly);

  /* compute the joint frequency of x and y. */
  if (exp == R_NilValue) {

    for (i = 0; i < num; i++) {

      n[xx[i] - 1][yy[i] - 1]++;
      nj[yy[i] - 1]++;

    }/*FOR*/

  }/*THEN*/
  else {

    int *e = INTEGER(exp);

    for (i = 0, k = 0; i < num; i++) {

      if (i != e[k] - 1) {

        n[xx[i] - 1][yy[i] - 1]++;
        nj[yy[i] - 1]++;

      }/*THEN*/
      else {

        k++;

      }/*ELSE*/

    }/*FOR*/

    /* adjust the sample size to match the number of observational data. */
   num -= LENGTH(exp);

  }/*ELSE*/

  /* compute the conditional posterior probability. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      *res += lgammafn(n[i][j] + alpha) - lgammafn(alpha);
  for (j = 0; j < lly; j++)
    *res += lgammafn((double)imaginary / lly) -
              lgammafn(nj[j] + (double)imaginary / lly);

  UNPROTECT(1);
  return result;

}/*CDPOST*/
Ejemplo n.º 7
0
/* conditional Monte Carlo simulation for discrete tests. */
SEXP cmcarlo_mean(SEXP x, SEXP y, SEXP z, SEXP lx, SEXP ly, SEXP lz,
    SEXP length, SEXP samples, SEXP test) {

double *fact = NULL, *res = NULL, observed = 0;
int **n = NULL, **ncolt = NULL, **nrowt = NULL, *ncond = NULL, *workspace = NULL;
int *num = INTEGER(length), *B = INTEGER(samples);
int *nr = INTEGER(lx), *nc = INTEGER(ly), *nl = INTEGER(lz);
int *xx = INTEGER(x), *yy = INTEGER(y), *zz = INTEGER(z);
int i = 0, j = 0, k = 0, npermuts = 0;
SEXP result;

  /* allocate and initialize the result */
  PROTECT(result = allocVector(REALSXP, 3));
  res = REAL(result);
  res[0] = res[1] = res[2] = 0; // initial test score / mean score / nb permutations

  /* allocate and compute the factorials needed by rcont2. */
  allocfact(*num);

  /* allocate and initialize the workspace for rcont2. */
  workspace = alloc1dcont(*nc);

  /* initialize the contingency table. */
  n = alloc2dcont(*nl, (*nr) * (*nc));

  /* initialize the marginal frequencies. */
  nrowt = alloc2dcont(*nl, *nr);
  ncolt = alloc2dcont(*nl, *nc);
  ncond = alloc1dcont(*nl);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < *num; k++)
    n[zz[k] - 1][CMC(xx[k] - 1, yy[k] - 1, *nr)]++;

  /* compute the marginals. */
  for (i = 0; i < *nr; i++)
    for (j = 0; j < *nc; j++)
      for (k = 0; k < *nl; k++) {

        nrowt[k][i] += n[k][CMC(i, j, *nr)];
        ncolt[k][j] += n[k][CMC(i, j, *nr)];
        ncond[k] += n[k][CMC(i, j, *nr)];

      }/*FOR*/

  /* initialize the random number generator. */
  GetRNGstate();
  
  /* pick up the observed value of the test statistic, then generate a set of
     random contingency tables (given row and column totals) and adds their
     test scores to compute the mean.*/
  switch(INT(test)) {

    case MUTUAL_INFORMATION:
      observed = 2 * _cmi(n, nrowt, ncolt, ncond, nr, nc, nl);
      
      for (j = 0; j < *B; j++) {
      
        for (k = 0; k < *nl; k++)
          rcont2(nr, nc, nrowt[k], ncolt[k], &(ncond[k]), fact, workspace, n[k]);
          
        res[1] += 2 * _cmi(n, nrowt, ncolt, ncond, nr, nc, nl);
        npermuts++;
      }

      break;

    case PEARSON_X2:
      observed = _cx2(n, nrowt, ncolt, ncond, nr, nc, nl);
      
      for (j = 0; j < *B; j++) {
      
        for (k = 0; k < *nl; k++)
          rcont2(nr, nc, nrowt[k], ncolt[k], &(ncond[k]), fact, workspace, n[k]);
          
        res[1] += _cx2(n, nrowt, ncolt, ncond, nr, nc, nl);
        npermuts++;
      }

      break;

  }/*SWITCH*/

  PutRNGstate();

  /* save the observed and mean values of the statistic,
     and the number of permutations performed. */
  res[0] = observed;
  res[1] /= *B; // mean
  res[2] = npermuts;

  UNPROTECT(1);

  return result;

}/*CMCARLO_MEAN*/