/* initialize a three-dimensional contingency table and the marginals. */
void fill_3d_table(int *xx, int *yy, int *zz, int ****n, int ***ni, int ***nj,
    int **nk, int llx, int lly, int llz, int num) {

int i = 0, j = 0, k = 0;

  *n = (int ***) Calloc3D(llz, llx, lly, sizeof(int));
  *ni = (int **) Calloc2D(llz, llx, sizeof(int));
  *nj = (int **) Calloc2D(llz, lly, sizeof(int));
  *nk = (int *) Calloc1D(llz, sizeof(int));

  /* compute the joint frequency of x, y, and z. */
  for (k = 0; k < num; k++)
    (*n)[zz[k] - 1][xx[k] - 1][yy[k] - 1]++;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      for (k = 0; k < llz; k++) {

        (*ni)[k][i] += (*n)[k][i][j];
        (*nj)[k][j] += (*n)[k][i][j];
        (*nk)[k] += (*n)[k][i][j];

      }/*FOR*/

}/*FILL_3D_TABLE*/
Example #2
0
/* shrinked mutual information, to be used in C code. */
double c_shmi(int *xx, int llx, int *yy, int lly, int num) {

int i = 0, j = 0, k = 0;
double **n = NULL, *ni = NULL, *nj = NULL;
double lambda = 0, target = 1/(double)(llx * lly);
double res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = (double **) Calloc2D(llx, lly, sizeof(double));
  ni = Calloc1D(llx, sizeof(double));
  nj = Calloc1D(lly, sizeof(double));

  /* compute the joint frequency of x and y. */
  for (k = 0; k < num; k++)
    n[xx[k] - 1][yy[k] - 1]++;

  /* estimate the optimal lambda for the data. */
  mi_lambda((double *)n, &lambda, target, num, llx, lly, 0);

  /* switch to the probability scale and shrink the estimates. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
        n[i][j] = lambda * target + (1 - lambda) * n[i][j] / num;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++) {

    ni[i] += n[i][j];
    nj[j] += n[i][j];

  }/*FOR*/

  /* compute the mutual information from the joint and marginal frequencies. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      if (n[i][j] != 0)
        res += n[i][j] * log(n[i][j] / (ni[i] * nj[j]));

  Free1D(ni);
  Free1D(nj);
  Free2D(n, llx);

  return res;

}/*C_SHMI*/
Example #3
0
double cdlik(SEXP x, SEXP y, double *nparams) {

int i = 0, j = 0, k = 0;
int **n = NULL, *nj = NULL;
int llx = NLEVELS(x), lly = NLEVELS(y), num = length(x);
int *xx = INTEGER(x), *yy = INTEGER(y);
double res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = (int **) Calloc2D(llx, lly, sizeof(int));
  nj = Calloc1D(lly, sizeof(int));

  /* compute the joint frequency of x and y. */
  for (k = 0; k < num; k++)
    n[xx[k] - 1][yy[k] - 1]++;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      nj[j] += n[i][j];

  /* compute the conditional entropy from the joint and marginal
       frequencies. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      if (n[i][j] != 0)
        res += (double)n[i][j] * log((double)n[i][j] / (double)nj[j]);

  /* we may want to store the number of parameters. */
  if (nparams)
    *nparams = (llx - 1) * lly;

  Free1D(nj);
  Free2D(n, llx);

  return res;

}/*CDLIK*/
Example #4
0
/* shrinked conditional mutual information, to be used in C code. */
double c_shcmi(int *xx, int llx, int *yy, int lly, int *zz, int llz,
    int num, double *df) {

int i = 0, j = 0, k = 0;
double ***n = NULL, **ni = NULL, **nj = NULL, *nk = NULL;
double lambda = 0, target = 1/(double)(llx * lly * llz);
double res = 0;

  /* compute the degrees of freedom. */
  *df = (double)(llx - 1) * (double)(lly - 1) * (double)(llz);

  /* initialize the contingency table and the marginal frequencies. */
  n = (double ***) Calloc3D(llx, lly, llz, sizeof(double));
  ni = (double **) Calloc2D(llx, llz, sizeof(double));
  nj = (double **) Calloc2D(lly, llz, sizeof(double));
  nk = Calloc1D(llz, sizeof(double));

  /* compute the joint frequency of x, y, and z. */
  for (k = 0; k < num; k++)
    n[xx[k] - 1][yy[k] - 1][zz[k] - 1]++;

  /* estimate the optimal lambda for the data. */
  mi_lambda((double *)n, &lambda, target, num, llx, lly, llz);

  /* switch to the probability scale and shrink the estimates. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      for (k = 0; k < llz; k++)
        n[i][j][k] = lambda * target + (1 - lambda) * n[i][j][k] / num;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      for (k = 0; k < llz; k++) {

        ni[i][k] += n[i][j][k];
        nj[j][k] += n[i][j][k];
        nk[k] += n[i][j][k];

      }/*FOR*/

  for (k = 0; k < llz; k++) {

    /* check each level of the conditioning variable to avoid (again)
     * "divide by zero" errors. */
    if (nk[k] == 0)
      continue;

    for (j = 0; j < lly; j++) {

      for (i = 0; i < llx; i++) {

        if (n[i][j][k] > 0)
          res += n[i][j][k] * log( (n[i][j][k] * nk[k]) / (ni[i][k] * nj[j][k]) );

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  Free1D(nk);
  Free2D(ni, llx);
  Free2D(nj, lly);
  Free3D(n, llx, lly);

  return res;

}/*C_SHCMI*/
/* conditional posterior Dirichlet probability (covers BDe and K2 scores). */
double cdpost(SEXP x, SEXP y, SEXP iss, SEXP exp) {

int i = 0, j = 0, k = 0, imaginary = 0, num = length(x);
int llx = NLEVELS(x), lly = NLEVELS(y), p = llx * lly;
int *xx = INTEGER(x), *yy = INTEGER(y), **n = NULL, *nj = NULL;
double alpha = 0, res = 0;

  if (isNull(iss)) {

    /* this is for K2, which does not define an imaginary sample size;
     * all hyperparameters are set to 1 in the prior distribution. */
    imaginary = p;
    alpha = 1;

  }/*THEN*/
  else {

    /* this is for the BDe score. */
    imaginary = INT(iss);
    alpha = ((double) imaginary) / ((double) p);

  }/*ELSE*/

  /* initialize the contingency table. */
  n = (int **) Calloc2D(llx, lly, sizeof(int));
  nj = Calloc1D(lly, sizeof(int));

  /* compute the joint frequency of x and y. */
  if (exp == R_NilValue) {

    for (i = 0; i < num; i++) {

      n[xx[i] - 1][yy[i] - 1]++;
      nj[yy[i] - 1]++;

    }/*FOR*/

  }/*THEN*/
  else {

    int *e = INTEGER(exp);

    for (i = 0, k = 0; i < num; i++) {

      if (i != e[k] - 1) {

        n[xx[i] - 1][yy[i] - 1]++;
        nj[yy[i] - 1]++;

      }/*THEN*/
      else {

        k++;

      }/*ELSE*/

    }/*FOR*/

    /* adjust the sample size to match the number of observational data. */
   num -= length(exp);

  }/*ELSE*/

  /* compute the conditional posterior probability. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      res += lgammafn(n[i][j] + alpha) - lgammafn(alpha);
  for (j = 0; j < lly; j++)
    res += lgammafn((double)imaginary / lly) -
              lgammafn(nj[j] + (double)imaginary / lly);

  Free1D(nj);
  Free2D(n, llx);

  return res;

}/*CDPOST*/