Ejemplo n.º 1
0
/* unconditional Gaussian mutual information, to be used in C code. */
double c_mig(double *xx, double *yy, int *num) {

double cor = c_fast_cor(xx, yy, num);

  return - 0.5 * log(1 - cor * cor);

}/*C_MIG*/
Ejemplo n.º 2
0
/* parametric tests for Gaussian variables. */
static double ut_gaustests(SEXP xx, SEXP yy, int nobs, int ntests,
    double *pvalue, double *df, test_e test) {

int i = 0;
double transform = 0, *xptr = NULL, *yptr = REAL(yy);
double xm = 0, ym = 0, xsd = 0, ysd = 0, statistic = 0;

  /* compute the degrees of freedom for correlation and mutual information. */
  if (test == COR)
    *df = nobs - 2;
  else if ((test == MI_G) || (test == MI_G_SH))
    *df = 1;

  if (((test == COR) && (*df < 1)) || ((test == ZF) && (nobs - 2 < 2))) {

    /* if there are not enough degrees of freedom, return independence. */
    warning("trying to do an independence test with zero degrees of freedom.");

    *df = 0;
    statistic = 0;
    for (i = 0; i < ntests; i++)
      pvalue[i] = 1;

     return statistic;

  }/*THEN*/

  /* cache mean and variance. */
  ym = c_mean(yptr, nobs);
  ysd = c_sse(yptr, ym, nobs);

  for (i = 0; i < ntests; i++) {

    GAUSSIAN_SWAP_X();
    statistic = c_fast_cor(xptr, yptr, nobs, xm, ym, xsd, ysd);

    if (test == COR) {

      transform = cor_t_trans(statistic, *df);
      pvalue[i] = 2 * pt(fabs(transform), *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == MI_G) {

      statistic = 2 * nobs * cor_mi_trans(statistic);
      pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == MI_G_SH) {

      statistic *= 1 - cor_lambda(xptr, yptr, nobs, xm, ym, xsd, ysd, statistic);
      statistic = 2 * nobs * cor_mi_trans(statistic);
      pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == ZF) {

      statistic = cor_zf_trans(statistic, (double)nobs - 2);
      pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE);

    }/*THEN*/

  }/*FOR*/

  return statistic;

}/*UT_GAUSTESTS*/
Ejemplo n.º 3
0
/* conditional linear Gaussian mutual information test. */
static double ut_micg(SEXP xx, SEXP yy, int nobs, int ntests, double *pvalue,
  double *df) {

int i = 0, xtype = 0, ytype = TYPEOF(yy), llx = 0, lly = 0;
double xm = 0, xsd = 0, ym = 0, ysd = 0, statistic = 0;
void *xptr = NULL, *yptr = NULL;
SEXP xdata;

  if (ytype == INTSXP) {

    /* cache the number of levels. */
    lly = NLEVELS(yy);
    yptr = INTEGER(yy);

  }/*THEN*/
  else {

    /* cache mean and variance. */
    yptr = REAL(yy);
    ym = c_mean(yptr, nobs);
    ysd = c_sse(yptr, ym, nobs);

  }/*ELSE*/

  for (i = 0; i < ntests; i++) {

    xdata = VECTOR_ELT(xx, i);
    xtype = TYPEOF(xdata);

    if ((ytype == INTSXP) && (xtype == INTSXP)) {

      /* if both nodes are discrete, the test reverts back to a discrete
       * mutual information test. */
      xptr = INTEGER(xdata);
      llx = NLEVELS(xdata);
      DISCRETE_SWAP_X();
      statistic = 2 * nobs * c_chisqtest(xptr, llx, yptr, lly, nobs, df, MI);
      pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if ((ytype == REALSXP) && (xtype == REALSXP)) {

      /* if both nodes are continuous, the test reverts back to a Gaussian
       * mutual information test. */
      xptr = REAL(xdata);
      xm = c_mean(xptr, nobs);
      xsd = c_sse(xptr, xm, nobs);
      statistic = c_fast_cor(xptr, yptr, nobs, xm, ym, xsd, ysd);
      *df = 1;
      statistic = 2 * nobs * cor_mi_trans(statistic);
      pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else {

      if (xtype == INTSXP) {

        xptr = INTEGER(xdata);
        llx = NLEVELS(xdata);
        ysd = sqrt(ysd / (nobs - 1));
        statistic = 2 * nobs * c_micg(yptr, ym, ysd, xptr, llx, nobs);
        *df = llx - 1;
        pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

      }/*THEN*/
      else {

        xptr = REAL(xdata);
        xm = c_mean(xptr, nobs);
        xsd = sqrt(c_sse(xptr, xm, nobs) / (nobs - 1));
        statistic = 2 * nobs * c_micg(xptr, xm, xsd, yptr, lly, nobs);
        *df = lly - 1;
        pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

      }/*ELSE*/

    }/*THEN*/

  }/*FOR*/

  return statistic;

}/*UT_MICG*/
Ejemplo n.º 4
0
/* compute all the pairwise mutual information coefficients between the variables. */
void mi_matrix(double *mim, void **columns, int dim, int *nlevels, int *num,
    void *cond, int *clevels, double *means, double *sse, int *est) {

int i = 0, j = 0;

  switch(*est) {

    case DISCRETE_MAXIMUM_LIKELIHOOD:

      if (!cond) {

        for (i = 0; i < dim; i++) {

          for (j = i + 1; j < dim; j++) {

            mim[UPTRI3(i + 1, j + 1, dim)] =
              c_chisqtest(((int **)columns)[i], nlevels[i],
                   ((int **)columns)[j], nlevels[j], *num, NULL, MI, FALSE);

          }/*FOR*/

        }/*FOR*/

      }/*THEN*/
      else {

        for (i = 0; i < dim; i++) {

          for (j = i + 1; j < dim; j++) {

            mim[UPTRI3(i + 1, j + 1, dim)] =
              c_cchisqtest(((int **)columns)[i], nlevels[i],
                    ((int **)columns)[j], nlevels[j],
                    (int *)cond, *clevels, *num, NULL, MI, FALSE);

          }/*FOR*/

        }/*FOR*/

      }/*ELSE*/

      break;

    case GAUSSIAN_MAXIMUM_LIKELIHOOD:

      for (i = 0; i < dim; i++) {

        for (j = i + 1; j < dim; j++) {

          mim[UPTRI3(i + 1, j + 1, dim)] = cor_mi_trans(
            c_fast_cor(((double **)columns)[i], ((double **)columns)[j], *num,
              means[i], means[j], sse[i], sse[j]));

        }/*FOR*/

      }/*FOR*/

    break;

  }/*SWITCH*/

}/*MI_MATRIX*/