示例#1
0
/* parametric tests for Gaussian variables. */
static double ut_gaustests(SEXP xx, SEXP yy, int nobs, int ntests,
    double *pvalue, double *df, test_e test) {

int i = 0;
double transform = 0, *xptr = NULL, *yptr = REAL(yy);
double xm = 0, ym = 0, xsd = 0, ysd = 0, statistic = 0;

  /* compute the degrees of freedom for correlation and mutual information. */
  if (test == COR)
    *df = nobs - 2;
  else if ((test == MI_G) || (test == MI_G_SH))
    *df = 1;

  if (((test == COR) && (*df < 1)) || ((test == ZF) && (nobs - 2 < 2))) {

    /* if there are not enough degrees of freedom, return independence. */
    warning("trying to do an independence test with zero degrees of freedom.");

    *df = 0;
    statistic = 0;
    for (i = 0; i < ntests; i++)
      pvalue[i] = 1;

     return statistic;

  }/*THEN*/

  /* cache mean and variance. */
  ym = c_mean(yptr, nobs);
  ysd = c_sse(yptr, ym, nobs);

  for (i = 0; i < ntests; i++) {

    GAUSSIAN_SWAP_X();
    statistic = c_fast_cor(xptr, yptr, nobs, xm, ym, xsd, ysd);

    if (test == COR) {

      transform = cor_t_trans(statistic, *df);
      pvalue[i] = 2 * pt(fabs(transform), *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == MI_G) {

      statistic = 2 * nobs * cor_mi_trans(statistic);
      pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == MI_G_SH) {

      statistic *= 1 - cor_lambda(xptr, yptr, nobs, xm, ym, xsd, ysd, statistic);
      statistic = 2 * nobs * cor_mi_trans(statistic);
      pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == ZF) {

      statistic = cor_zf_trans(statistic, (double)nobs - 2);
      pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE);

    }/*THEN*/

  }/*FOR*/

  return statistic;

}/*UT_GAUSTESTS*/
示例#2
0
文件: mi.matrix.c 项目: cran/bnlearn
/* compute all the pairwise mutual information coefficients between the variables. */
void mi_matrix(double *mim, void **columns, int dim, int *nlevels, int *num,
    void *cond, int *clevels, double *means, double *sse, int *est) {

int i = 0, j = 0;

  switch(*est) {

    case DISCRETE_MAXIMUM_LIKELIHOOD:

      if (!cond) {

        for (i = 0; i < dim; i++) {

          for (j = i + 1; j < dim; j++) {

            mim[UPTRI3(i + 1, j + 1, dim)] =
              c_chisqtest(((int **)columns)[i], nlevels[i],
                   ((int **)columns)[j], nlevels[j], *num, NULL, MI, FALSE);

          }/*FOR*/

        }/*FOR*/

      }/*THEN*/
      else {

        for (i = 0; i < dim; i++) {

          for (j = i + 1; j < dim; j++) {

            mim[UPTRI3(i + 1, j + 1, dim)] =
              c_cchisqtest(((int **)columns)[i], nlevels[i],
                    ((int **)columns)[j], nlevels[j],
                    (int *)cond, *clevels, *num, NULL, MI, FALSE);

          }/*FOR*/

        }/*FOR*/

      }/*ELSE*/

      break;

    case GAUSSIAN_MAXIMUM_LIKELIHOOD:

      for (i = 0; i < dim; i++) {

        for (j = i + 1; j < dim; j++) {

          mim[UPTRI3(i + 1, j + 1, dim)] = cor_mi_trans(
            c_fast_cor(((double **)columns)[i], ((double **)columns)[j], *num,
              means[i], means[j], sse[i], sse[j]));

        }/*FOR*/

      }/*FOR*/

    break;

  }/*SWITCH*/

}/*MI_MATRIX*/
示例#3
0
/* conditional linear Gaussian mutual information test. */
static double ut_micg(SEXP xx, SEXP yy, int nobs, int ntests, double *pvalue,
  double *df) {

int i = 0, xtype = 0, ytype = TYPEOF(yy), llx = 0, lly = 0;
double xm = 0, xsd = 0, ym = 0, ysd = 0, statistic = 0;
void *xptr = NULL, *yptr = NULL;
SEXP xdata;

  if (ytype == INTSXP) {

    /* cache the number of levels. */
    lly = NLEVELS(yy);
    yptr = INTEGER(yy);

  }/*THEN*/
  else {

    /* cache mean and variance. */
    yptr = REAL(yy);
    ym = c_mean(yptr, nobs);
    ysd = c_sse(yptr, ym, nobs);

  }/*ELSE*/

  for (i = 0; i < ntests; i++) {

    xdata = VECTOR_ELT(xx, i);
    xtype = TYPEOF(xdata);

    if ((ytype == INTSXP) && (xtype == INTSXP)) {

      /* if both nodes are discrete, the test reverts back to a discrete
       * mutual information test. */
      xptr = INTEGER(xdata);
      llx = NLEVELS(xdata);
      DISCRETE_SWAP_X();
      statistic = 2 * nobs * c_chisqtest(xptr, llx, yptr, lly, nobs, df, MI);
      pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if ((ytype == REALSXP) && (xtype == REALSXP)) {

      /* if both nodes are continuous, the test reverts back to a Gaussian
       * mutual information test. */
      xptr = REAL(xdata);
      xm = c_mean(xptr, nobs);
      xsd = c_sse(xptr, xm, nobs);
      statistic = c_fast_cor(xptr, yptr, nobs, xm, ym, xsd, ysd);
      *df = 1;
      statistic = 2 * nobs * cor_mi_trans(statistic);
      pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else {

      if (xtype == INTSXP) {

        xptr = INTEGER(xdata);
        llx = NLEVELS(xdata);
        ysd = sqrt(ysd / (nobs - 1));
        statistic = 2 * nobs * c_micg(yptr, ym, ysd, xptr, llx, nobs);
        *df = llx - 1;
        pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

      }/*THEN*/
      else {

        xptr = REAL(xdata);
        xm = c_mean(xptr, nobs);
        xsd = sqrt(c_sse(xptr, xm, nobs) / (nobs - 1));
        statistic = 2 * nobs * c_micg(xptr, xm, xsd, yptr, lly, nobs);
        *df = lly - 1;
        pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

      }/*ELSE*/

    }/*THEN*/

  }/*FOR*/

  return statistic;

}/*UT_MICG*/
示例#4
0
/* parametric tests for Gaussian variables. */
static double ct_gaustests(SEXP xx, SEXP yy, SEXP zz, int nobs, int ntests,
    double *pvalue, double *df, test_e test) {

int i = 0, nsx = length(zz), ncols = nsx + 2;
double transform = 0, **column = NULL, *mean = NULL, statistic = 0, lambda = 0;
double *u = NULL, *d = NULL, *vt = NULL, *cov = NULL, *basecov = 0;

  /* compute the degrees of freedom for correlation and mutual information. */
  if (test == COR)
    *df = nobs - ncols;
  else if ((test == MI_G) || (test == MI_G_SH))
    *df = 1;

  if (((test == COR) && (*df < 1)) || ((test == ZF) && (nobs - ncols < 2))) {

    /* if there are not enough degrees of freedom, return independence. */
    warning("trying to do a conditional independence test with zero degrees of freedom.");

    *df = 0;
    statistic = 0;
    for (i = 0; i < ntests; i++)
      pvalue[i] = 1;

    return statistic;

  }/*THEN*/

  GAUSSIAN_CACHE();

  if (ntests > 1) {

    /* allocate and compute mean values and the covariance matrix. */
    mean = Calloc1D(ncols, sizeof(double));
    c_meanvec(column, mean, nobs, ncols, 1);
    c_covmat(column, mean, ncols, nobs, cov, 1);
    memcpy(basecov, cov, ncols * ncols * sizeof(double));

    for (i = 0; i < ntests; i++) {

      GAUSSIAN_PCOR_CACHE();

      if (test == COR) {

        COMPUTE_PCOR();
        transform = cor_t_trans(statistic, *df);
        pvalue[i] = 2 * pt(fabs(transform), *df, FALSE, FALSE);

      }/*THEN*/
      else if (test == MI_G) {

        COMPUTE_PCOR();
        statistic = 2 * nobs * cor_mi_trans(statistic);
        pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

      }/*THEN*/
      else if (test == MI_G_SH) {

        lambda = covmat_lambda(column, mean, cov, nobs, ncols);
        covmat_shrink(cov, ncols, lambda);
        COMPUTE_PCOR();
        statistic = 2 * nobs * cor_mi_trans(statistic);
        pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

      }/*THEN*/
      else if (test == ZF) {

        COMPUTE_PCOR();
        statistic = cor_zf_trans(statistic, (double)nobs - ncols);
        pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE);

      }/*THEN*/

    }/*FOR*/

  }/*THEN*/
  else {

    GAUSSIAN_PCOR_NOCACHE();

    if (test == COR) {

      COMPUTE_PCOR();
      transform = cor_t_trans(statistic, *df);
      pvalue[0] = 2 * pt(fabs(transform), *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == MI_G) {

      COMPUTE_PCOR();
      statistic = 2 * nobs * cor_mi_trans(statistic);
      pvalue[0] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == MI_G_SH) {

      lambda = covmat_lambda(column, mean, cov, nobs, ncols);
      covmat_shrink(cov, ncols, lambda);
      COMPUTE_PCOR();
      statistic = 2 * nobs * cor_mi_trans(statistic);
      pvalue[0] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == ZF) {

      COMPUTE_PCOR();
      statistic = cor_zf_trans(statistic, (double)nobs - ncols);
      pvalue[0] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE);

    }/*THEN*/

  }/*ELSE*/

  GAUSSIAN_FREE();

  Free1D(mean);
  Free1D(column);

  return statistic;

}/*CT_GAUSTESTS*/