Exemplo n.º 1
0
/* parametric tests for discrete variables. */
static double ct_discrete(SEXP xx, SEXP yy, SEXP zz, int nobs, int ntests,
    double *pvalue, double *df, test_e test) {

int i = 0, llx = 0, lly = NLEVELS(yy), llz = 0;
int *xptr = NULL, *yptr = INTEGER(yy), *zptr = NULL;
double statistic = 0;
SEXP xdata, config;

  DISCRETE_CACHE();

  for (i = 0; i < ntests; i++) {

    DISCRETE_SWAP_X();

    if (test == MI || test == MI_ADF || test == X2 || test == X2_ADF) {

      /* mutual information and Pearson's X^2 asymptotic tests. */
      statistic = c_cchisqtest(xptr, llx, yptr, lly, zptr, llz, nobs, df, test);
      if ((test == MI) || (test == MI_ADF))
        statistic = 2 * nobs * statistic;
      pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == MI_SH) {

      /* shrinkage mutual information test. */
      statistic = 2 * nobs * c_shcmi(xptr, llx, yptr, lly, zptr, llz,
                               nobs, df);
      pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == JT) {

      /* Jonckheere-Terpstra test. */
      statistic = c_cjt(xptr, llx, yptr, lly, zptr, llz, nobs);
      pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE);

    }/*THEN*/

  }/*FOR*/

  UNPROTECT(1);

  return statistic;

}/*CT_DISCRETE*/
Exemplo n.º 2
0
/* compute all the pairwise mutual information coefficients between the variables. */
void mi_matrix(double *mim, void **columns, int dim, int *nlevels, int *num,
    void *cond, int *clevels, double *means, double *sse, int *est) {

int i = 0, j = 0;

  switch(*est) {

    case DISCRETE_MAXIMUM_LIKELIHOOD:

      if (!cond) {

        for (i = 0; i < dim; i++) {

          for (j = i + 1; j < dim; j++) {

            mim[UPTRI3(i + 1, j + 1, dim)] =
              c_chisqtest(((int **)columns)[i], nlevels[i],
                   ((int **)columns)[j], nlevels[j], *num, NULL, MI, FALSE);

          }/*FOR*/

        }/*FOR*/

      }/*THEN*/
      else {

        for (i = 0; i < dim; i++) {

          for (j = i + 1; j < dim; j++) {

            mim[UPTRI3(i + 1, j + 1, dim)] =
              c_cchisqtest(((int **)columns)[i], nlevels[i],
                    ((int **)columns)[j], nlevels[j],
                    (int *)cond, *clevels, *num, NULL, MI, FALSE);

          }/*FOR*/

        }/*FOR*/

      }/*ELSE*/

      break;

    case GAUSSIAN_MAXIMUM_LIKELIHOOD:

      for (i = 0; i < dim; i++) {

        for (j = i + 1; j < dim; j++) {

          mim[UPTRI3(i + 1, j + 1, dim)] = cor_mi_trans(
            c_fast_cor(((double **)columns)[i], ((double **)columns)[j], *num,
              means[i], means[j], sse[i], sse[j]));

        }/*FOR*/

      }/*FOR*/

    break;

  }/*SWITCH*/

}/*MI_MATRIX*/
Exemplo n.º 3
0
/* conditional linear Gaussian mutual information test. */
static double ct_micg(SEXP xx, SEXP yy, SEXP zz, int nobs, int ntests,
    double *pvalue, double *df) {

int xtype = 0, ytype = TYPEOF(yy), *nlvls = NULL, llx = 0, lly = 0, llz = 0;
int ndp = 0, ngp = 0, nsx = length(zz), **dp = NULL, *dlvls = NULL, j = 0, k = 0;
int i = 0, *zptr = 0;
void *xptr = NULL, *yptr = NULL, **columns = NULL;
double **gp = NULL;
double statistic = 0;
SEXP xdata;

  if (ytype == INTSXP) {

    /* cache the number of levels. */
    lly = NLEVELS(yy);
    yptr = INTEGER(yy);

  }/*THEN*/
  else {

    yptr = REAL(yy);

  }/*ELSE*/

  /* extract the conditioning variables and cache their types. */
  columns = Calloc1D(nsx, sizeof(void *));
  nlvls = Calloc1D(nsx, sizeof(int));
  df2micg(zz, columns, nlvls, &ndp, &ngp);

  dp = Calloc1D(ndp + 1, sizeof(int *));
  gp = Calloc1D(ngp + 1, sizeof(double *));
  dlvls = Calloc1D(ndp + 1, sizeof(int));
  for (i = 0, j = 0, k = 0; i < nsx; i++)
    if (nlvls[i] > 0) {

      dlvls[1 + j] = nlvls[i];
      dp[1 + j++] = columns[i];

    }/*THEN*/
    else {

      gp[1 + k++] = columns[i];

    }/*ELSE*/

  /* allocate vector for the configurations of the discrete parents; or, if
   * there no discrete parents, for the means of the continuous parents. */
  if (ndp > 0) {

    zptr = Calloc1D(nobs, sizeof(int));
    c_fast_config(dp + 1, nobs, ndp, dlvls + 1, zptr, &llz, 1);

  }/*THEN*/

  for (i = 0; i < ntests; i++) {

    xdata = VECTOR_ELT(xx, i);
    xtype = TYPEOF(xdata);

    if (xtype == INTSXP) {

      xptr = INTEGER(xdata);
      llx = NLEVELS(xdata);

    }/*THEN*/
    else {

      xptr = REAL(xdata);

    }/*ELSE*/

    if ((ytype == INTSXP) && (xtype == INTSXP)) {

      if (ngp > 0) {

        /* need to reverse conditioning to actually compute the test. */
        statistic = 2 * nobs * nobs *
                      c_cmicg_unroll(xptr, llx, yptr, lly, zptr, llz,
                                 gp + 1, ngp, df, nobs);

      }/*THEN*/
      else {

        /* the test reverts back to a discrete mutual information test. */
        statistic = 2 * nobs * c_cchisqtest(xptr, llx, yptr, lly, zptr, llz,
                                 nobs, df, MI);

      }/*ELSE*/

    }/*THEN*/
    else if ((ytype == REALSXP) && (xtype == REALSXP)) {

      gp[0] = xptr;
      statistic = 2 * nobs * c_cmicg(yptr, gp, ngp + 1, NULL, 0, zptr, llz,
                               dlvls, nobs);
      /* one regression coefficient for each conditioning level is added;
       * if all conditioning variables are continuous that's just one global
       * regression coefficient. */
      *df = (llz == 0) ? 1 : llz;

    }/*THEN*/
    else if ((ytype == INTSXP) && (xtype == REALSXP)) {

      dp[0] = yptr;
      dlvls[0] = lly;
      statistic = 2 * nobs * c_cmicg(xptr, gp + 1, ngp, dp, ndp + 1, zptr,
                               llz, dlvls, nobs);

      /* for each additional configuration of the discrete conditioning
       * variables plus the discrete yptr, one whole set of regression
       * coefficients (plus the intercept) is added. */
      *df = (lly - 1) * ((llz == 0) ? 1 : llz)  * (ngp + 1);

    }/*THEN*/
    else if ((ytype == REALSXP) && (xtype == INTSXP)) {

      dp[0] = xptr;
      dlvls[0] = llx;
      statistic = 2 * nobs * c_cmicg(yptr, gp + 1, ngp, dp, ndp + 1, zptr,
                               llz, dlvls, nobs);
      /* same as above, with xptr and yptr swapped. */
      *df = (llx - 1) * ((llz == 0) ? 1 : llz) * (ngp + 1);

    }/*ELSE*/

    pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

  }/*FOR*/

  Free1D(columns);
  Free1D(nlvls);
  Free1D(dlvls);
  Free1D(zptr);
  Free1D(dp);
  Free1D(gp);

  return statistic;

}/*CT_MICG*/