コード例 #1
0
/* C-level wrapper around the dgesdd() F77 routine. Note that the input
 * matrix A is overwritten by dgesdd(), so it's sensible to have a
 * backup copy in case it's needed later. */
void c_svd(double *A, double *U, double *D, double *V, int *nrows,
    int *ncols, int *mindim, int strict, int *errcode) {

int lwork = -1, *iwork = NULL;
char jobz = 'A';
double tmp = 0, *work = NULL;

  iwork = Calloc1D(8 * (*mindim), sizeof(int));

  /* ask for the optimal size of the work array. */
  F77_CALL(dgesdd)(&jobz, nrows, ncols, A, nrows, D, U, nrows,
                   V, mindim, &tmp, &lwork, iwork, errcode);

  lwork = (int)tmp;
  work = Calloc1D(lwork, sizeof(double));

  /* actual call */
  F77_NAME(dgesdd)(&jobz, nrows, ncols, A, nrows, D, U, nrows,
                   V, mindim, work, &lwork, iwork, errcode);

  Free1D(work);
  Free1D(iwork);

  if (*errcode && strict)
    error("an error (%d) occurred in the call to dgesdd().\n", *errcode);

}/*C_SVD*/
コード例 #2
0
/* helper function to allocate U, D and Vt for SVD. */
void c_udvt(double **u, double **d, double **vt, int ncols) {

  *u = Calloc1D(ncols * ncols, sizeof(double));
  *d = Calloc1D(ncols, sizeof(double));
  *vt = Calloc1D(ncols * ncols, sizeof(double));

}/*C_UDVT*/
コード例 #3
0
SEXP smart_network_averaging(SEXP arcs, SEXP nodes, SEXP weights) {

int k = 0, from = 0, to = 0, nrows = length(arcs) / 2, dims = length(nodes);
int *a = NULL, *coords = NULL, *poset = NULL, *path = NULL, *scratch = NULL;
double *w = NULL;
SEXP weights2, amat, try, acyclic;

  /* allocate and initialize the adjacency matrix. */
  PROTECT(amat = allocMatrix(INTSXP, dims, dims));
  a = INTEGER(amat);
  memset(a, '\0', sizeof(int) * dims * dims);

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  coords = INTEGER(try);

  /* duplicate the weights to preserve the original ones. */
  PROTECT(weights2 = duplicate(weights));
  w = REAL(weights2);

  /* sort the strength coefficients. */
  poset = Calloc1D(nrows, sizeof(int));
  for (k = 0; k < nrows; k++)
    poset[k] = k;
  R_qsort_I(w, poset, 1, nrows);

  /* allocate buffers for c_has_path(). */
  path = Calloc1D(dims, sizeof(int));
  scratch = Calloc1D(dims, sizeof(int));

  /* iterate over the arcs in reverse order wrt their strength coefficients. */
  for (k = 0; k < nrows; k++) {

    from = coords[poset[k]] - 1;
    to = coords[poset[k] + nrows] - 1;

    /* add an arc only if it does not introduce cycles. */
    if (!c_has_path(to, from, a, dims, nodes, FALSE, TRUE, path, scratch, FALSE))
      a[CMC(from, to, dims)] = 1;
    else
      warning("arc %s -> %s would introduce cycles in the graph, ignoring.",
        NODE(from), NODE(to));

  }/*FOR*/

  /* convert the adjacency matrix back to an arc set and return it. */
  acyclic = amat2arcs(amat, nodes);

  Free1D(path);
  Free1D(scratch);
  Free1D(poset);

  UNPROTECT(3);

  return acyclic;

}/*SMART_NETWORK_AVERAGING*/
コード例 #4
0
/* C-level function to perform OLS via QR decomposition. */
void c_qr_ols (double *qr, double *y, int nrow, int ncol, double *fitted,
    long double *sd) {

int i = 0, job = 1, rank = 0, info = 0, *pivot = NULL;
double tol = MACHINE_TOL, *qraux = NULL, *work = NULL;

  /* safety check for sample size = 1. */
  if (nrow == 1) {

    *sd = 0;
    *fitted = *y;

    return;

  }/*THEN*/

  /* allocate the working space. */
  qraux = Calloc1D(ncol, sizeof(double));
  work = Calloc1D(2 * ncol, sizeof(double));
  pivot = Calloc1D(ncol, sizeof(int));
  for (i = 0; i < ncol; i++)
    pivot[i] = i + 1;

  /* perform the QR decomposition. */
  F77_CALL(dqrdc2)(qr, &nrow, &nrow, &ncol, &tol, &rank, qraux, pivot, work);

  /* operate on a backup copy of the response variable. */
  memcpy(fitted, y, nrow * sizeof(double));

  /* compute the fitted values. */
  /*       dqrsl( x,  ldx,   n,     k,     qraux, y,      qy,     qty, */
  F77_CALL(dqrsl)(qr, &nrow, &nrow, &rank, qraux, fitted, NULL,   fitted,
  /*  b,      rsd,    xb,     job,  info) */
      NULL,   NULL,   fitted, &job, &info);

  if (info != 0)
    error("an error (%d) occurred in the call to dqrsl().\n", &info);

  /* compute the standard deviation of the residuals. */
  for (i = 0, *sd = 0; i < nrow; i++)
    *sd += (y[i] - fitted[i]) * (y[i] - fitted[i]);
  *sd = sqrt(*sd / (nrow - 1));

  Free1D(pivot);
  Free1D(work);
  Free1D(qraux);

}/*C_QR_OLS*/
コード例 #5
0
/* initialize a three-dimensional contingency table and the marginals. */
void fill_3d_table(int *xx, int *yy, int *zz, int ****n, int ***ni, int ***nj,
    int **nk, int llx, int lly, int llz, int num) {

int i = 0, j = 0, k = 0;

  *n = (int ***) Calloc3D(llz, llx, lly, sizeof(int));
  *ni = (int **) Calloc2D(llz, llx, sizeof(int));
  *nj = (int **) Calloc2D(llz, lly, sizeof(int));
  *nk = (int *) Calloc1D(llz, sizeof(int));

  /* compute the joint frequency of x, y, and z. */
  for (k = 0; k < num; k++)
    (*n)[zz[k] - 1][xx[k] - 1][yy[k] - 1]++;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      for (k = 0; k < llz; k++) {

        (*ni)[k][i] += (*n)[k][i][j];
        (*nj)[k][j] += (*n)[k][i][j];
        (*nk)[k] += (*n)[k][i][j];

      }/*FOR*/

}/*FILL_3D_TABLE*/
コード例 #6
0
/* shrinked mutual information, to be used in C code. */
double c_shmi(int *xx, int llx, int *yy, int lly, int num) {

int i = 0, j = 0, k = 0;
double **n = NULL, *ni = NULL, *nj = NULL;
double lambda = 0, target = 1/(double)(llx * lly);
double res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = (double **) Calloc2D(llx, lly, sizeof(double));
  ni = Calloc1D(llx, sizeof(double));
  nj = Calloc1D(lly, sizeof(double));

  /* compute the joint frequency of x and y. */
  for (k = 0; k < num; k++)
    n[xx[k] - 1][yy[k] - 1]++;

  /* estimate the optimal lambda for the data. */
  mi_lambda((double *)n, &lambda, target, num, llx, lly, 0);

  /* switch to the probability scale and shrink the estimates. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
        n[i][j] = lambda * target + (1 - lambda) * n[i][j] / num;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++) {

    ni[i] += n[i][j];
    nj[j] += n[i][j];

  }/*FOR*/

  /* compute the mutual information from the joint and marginal frequencies. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      if (n[i][j] != 0)
        res += n[i][j] * log(n[i][j] / (ni[i] * nj[j]));

  Free1D(ni);
  Free1D(nj);
  Free2D(n, llx);

  return res;

}/*C_SHMI*/
コード例 #7
0
/* get the number of parameters of the whole network (mixed case, also handles
 * discrete and Gaussian networks). */
SEXP nparams_cgnet(SEXP graph, SEXP data, SEXP debug) {

int i = 0, j = 0, nnodes = 0, debuglevel = isTRUE(debug);
int *nlevels = NULL, *index = NULL, ngp = 0;
double nconfig = 0, node_params = 0, all_params = 0;
SEXP nodes = R_NilValue, node_data, parents, temp;

  /* get nodes' number and data. */
  node_data = getListElement(graph, "nodes");
  nnodes = length(node_data);
  nodes = getAttrib(node_data, R_NamesSymbol);
  /* cache the number of levels of each variables (zero = continuous). */
  nlevels = Calloc1D(nnodes, sizeof(int));
  for (i = 0; i < nnodes; i++) {

    temp = VECTOR_ELT(data, i);
    if (TYPEOF(temp) == INTSXP)
      nlevels[i] = NLEVELS(temp);

  }/*FOR*/

  for (i = 0; i < nnodes; i++) {

    /* extract the parents of the node and match them. */
    parents = getListElement(VECTOR_ELT(node_data, i), "parents");
    PROTECT(temp = match(nodes, parents, 0));
    index = INTEGER(temp);

    /* compute the number of regressors and of configurations. */
    for (j = 0, ngp = 0, nconfig = 1; j < length(parents); j++) {

      if (nlevels[index[j] - 1] == 0)
        ngp++;
      else
        nconfig *= nlevels[index[j] - 1];

    }/*FOR*/
    /* compute the overall number of parameters as regressors plus intercept
     * times configurations. */
    node_params = nconfig * (nlevels[i] == 0 ? ngp + 1 : nlevels[i] - 1);

    if (debuglevel > 0)
      Rprintf("* node %s has %.0lf parameter(s).\n", NODE(i), node_params);

    /* update the return value. */
    all_params += node_params;

    UNPROTECT(1);

  }/*FOR*/

  Free1D(nlevels);

  return ScalarReal(all_params);

}/*NPARAMS_CGNET*/
コード例 #8
0
/* initialize a one-dimensional contingency table. */
void fill_1d_table(int *xx, int **n, int llx, int num) {

int i = 0;

  *n = Calloc1D(llx, sizeof(int));

  for (i = 0; i < num; i++)
    (*n)[xx[i] - 1]++;

}/*FILL_1D_TABLE*/
コード例 #9
0
/* C-level function to compute Moore-Penrose Generalized Inverse of a square matrix. */
void c_ginv(double *covariance, int ncols, double *mpinv) {

int i = 0, j = 0, errcode = 0;
double *u = NULL, *d = NULL, *vt = NULL, *backup = NULL;
double sv_tol = 0, zero = 0, one = 1;
char transa = 'N', transb = 'N';

  c_udvt(&u, &d, &vt, ncols);

  if (covariance != mpinv) {

    backup = Calloc1D(ncols * ncols, sizeof(double));
    memcpy(backup, covariance, ncols * ncols * sizeof(double));

  }/*THEN*/

  /* compute the SVD decomposition. */
  c_svd(covariance, u, d, vt, &ncols, &ncols, &ncols, FALSE, &errcode);

  /* if SVD fails, catch the error code and free all buffers. */
  if (errcode == 0) {

    /* set the threshold for the singular values as in corpcor. */
    sv_tol = ncols * d[0] * MACHINE_TOL * MACHINE_TOL;

    /* the first multiplication, U * D^{-1} is easy. */
    for (i = 0; i < ncols; i++)
      for (j = 0; j < ncols; j++)
        u[CMC(i, j, ncols)] = u[CMC(i, j, ncols)] * ((d[j] > sv_tol) ? 1/d[j] : 0);

    /* the second one, (U * D^{-1}) * Vt  is a real matrix multiplication. */
    F77_CALL(dgemm)(&transa, &transb, &ncols, &ncols, &ncols, &one, u,
      &ncols, vt, &ncols, &zero, mpinv, &ncols);

  }/*THEN*/

  if (covariance != mpinv) {

    memcpy(covariance, backup, ncols * ncols * sizeof(double));
    Free1D(backup);

  }/*THEN*/

  Free1D(u);
  Free1D(d);
  Free1D(vt);

  if (errcode)
    error("an error (%d) occurred in the call to c_ginv().\n", errcode);

}/*C_GINV*/
コード例 #10
0
ファイル: discrete.loglikelihood.c プロジェクト: cran/bnlearn
double cdlik(SEXP x, SEXP y, double *nparams) {

int i = 0, j = 0, k = 0;
int **n = NULL, *nj = NULL;
int llx = NLEVELS(x), lly = NLEVELS(y), num = length(x);
int *xx = INTEGER(x), *yy = INTEGER(y);
double res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = (int **) Calloc2D(llx, lly, sizeof(int));
  nj = Calloc1D(lly, sizeof(int));

  /* compute the joint frequency of x and y. */
  for (k = 0; k < num; k++)
    n[xx[k] - 1][yy[k] - 1]++;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      nj[j] += n[i][j];

  /* compute the conditional entropy from the joint and marginal
       frequencies. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      if (n[i][j] != 0)
        res += (double)n[i][j] * log((double)n[i][j] / (double)nj[j]);

  /* we may want to store the number of parameters. */
  if (nparams)
    *nparams = (llx - 1) * lly;

  Free1D(nj);
  Free2D(n, llx);

  return res;

}/*CDLIK*/
コード例 #11
0
ファイル: dedup.c プロジェクト: cran/bnlearn
/* remove one variable in each highly-correlated pair. */
SEXP dedup (SEXP data, SEXP threshold, SEXP complete, SEXP debug) {

int i = 0, j = 0, k = 0, dropped = 0, nc = 0;
int debuglevel = isTRUE(debug);
double *mean = NULL, *sse = NULL, *xx = NULL, *yy = NULL;
double cur_mean[2], cur_sse[2];
double tol = MACHINE_TOL, t = NUM(threshold);
long double sum = 0;
SEXP result, colnames;
gdata dt = { 0 };

  /* extract the columns from the data frame. */
  dt = gdata_from_SEXP(data, 0);
  meta_init_flags(&(dt.m), 0, complete, R_NilValue);
  meta_copy_names(&(dt.m), 0, data);
  /* set up the vectors for the pairwise complete observations. */
  xx = Calloc1D(dt.m.nobs, sizeof(double));
  yy = Calloc1D(dt.m.nobs, sizeof(double));

  if (debuglevel > 0)
    Rprintf("* caching means and variances.\n");

  mean = Calloc1D(dt.m.ncols, sizeof(double));
  sse = Calloc1D(dt.m.ncols, sizeof(double));

  /* cache the mean and variance of complete variables. */
  for (j = 0; j < dt.m.ncols; j++) {

    if (!dt.m.flag[j].complete)
      continue;

    mean[j] = c_mean(dt.col[j], dt.m.nobs);
    sse[j] = c_sse(dt.col[j], mean[j], dt.m.nobs);

  }/*FOR*/

  /* main loop. */
  for (j = 0; j < dt.m.ncols - 1; j++) {

    /* skip variables already flagged for removal. */
    if (dt.m.flag[j].drop)
      continue;

    if (debuglevel > 0)
      Rprintf("* looking at %s with %d variables still to check.\n",
        dt.m.names[j], dt.m.ncols - (j + 1));

    for (k = j + 1; k < dt.m.ncols; k++) {

      /* skip variables already flagged for removal. */
      if (dt.m.flag[k].drop)
        continue;

      if (dt.m.flag[j].complete && dt.m.flag[k].complete) {

        /* use the cached means and variances. */
        cur_mean[0] = mean[j];
        cur_mean[1] = mean[k];
        cur_sse[0] = sse[j];
        cur_sse[1] = sse[k];

        /* compute the covariance. */
        for (i = 0, sum = 0; i < dt.m.nobs; i++)
          sum += (dt.col[j][i] - cur_mean[0]) * (dt.col[k][i] - cur_mean[1]);

      }/*THEN*/
      else {

        for (i = 0, nc = 0; i < dt.m.nobs; i++) {

          if (ISNAN(dt.col[j][i]) || ISNAN(dt.col[k][i]))
            continue;

          xx[nc] = dt.col[j][i];
          yy[nc++] = dt.col[k][i];

        }/*FOR*/


        /* if there are no complete observations, take the variables to be
         * independent. */
        if (nc == 0)
          continue;

        cur_mean[0] = c_mean(xx, nc);
        cur_mean[1] = c_mean(yy, nc);
        cur_sse[0] = c_sse(xx, cur_mean[0], nc);
        cur_sse[1] = c_sse(yy, cur_mean[1], nc);

        /* compute the covariance. */
        for (i = 0, sum = 0; i < nc; i++)
          sum += (xx[i] - cur_mean[0]) * (yy[i] - cur_mean[1]);

      }/*ELSE*/

      /* safety check against "divide by zero" errors. */
      if ((cur_sse[0] < tol) || (cur_sse[1] < tol))
        sum = 0;
      else
        sum /= sqrt(cur_sse[0] * cur_sse[1]);

      /* test the correlation against the threshold. */
      if (fabsl(sum) > t) {

        if (debuglevel > 0)
          Rprintf("%s is collinear with %s, dropping %s with COR = %.4Lf\n",
            dt.m.names[j], dt.m.names[k], dt.m.names[k], sum);

        /* flag the variable for removal. */
        dt.m.flag[k].drop = TRUE;
        dropped++;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  /* set up the return value. */
  PROTECT(result = allocVector(VECSXP, dt.m.ncols - dropped));
  PROTECT(colnames = allocVector(STRSXP, dt.m.ncols - dropped));

  for (j = 0, k = 0; j < dt.m.ncols; j++)
    if (!dt.m.flag[j].drop) {

      SET_STRING_ELT(colnames, k, mkChar(dt.m.names[j]));
      SET_VECTOR_ELT(result, k++, VECTOR_ELT(data, j));

    }/*THEN*/

  setAttrib(result, R_NamesSymbol, colnames);

  /* make it a data frame. */
  minimal_data_frame(result);

  Free1D(mean);
  Free1D(sse);
  Free1D(xx);
  Free1D(yy);
  FreeGDT(dt, FALSE);

  UNPROTECT(2);

  return result;

}/*DEDUP*/
コード例 #12
0
ファイル: mi.matrix.c プロジェクト: cran/bnlearn
/* set the directions of the arcs in a tree given the root node. */
SEXP tree_directions(SEXP arcs, SEXP nodes, SEXP root, SEXP debug) {

int i = 0, j = 0, d = 0, traversed = 1;
int narcs = length(arcs)/2, nnodes = length(nodes);
int *a = NULL, *depth = 0, debuglevel = isTRUE(debug);
SEXP try, try2, result;

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  a = INTEGER(try);

  /* match the root node. */
  PROTECT(try2 = match(nodes, root, 0));

  /* allocate and initialize the statust vector. */
  depth = Calloc1D(nnodes, sizeof(int));
  depth[INT(try2) - 1] = 1;

  if (debuglevel > 0)
    Rprintf("> root node (depth 1) is %s.\n", NODE(INT(try2) - 1));

  for (d = 1; d <= nnodes; d++) {

    if (debuglevel > 0)
      Rprintf("> considering nodes at depth %d.\n", d + 1);

    for (i = 0; i < narcs; i++) {

      for (j = 0; j < nnodes; j++) {

        /* disregard nodes at the wrong depth. */
        if (depth[j] != d)
          continue;

        if ((a[i + narcs] == (j + 1)) && (depth[a[i] - 1] == 0)) {

          if (debuglevel > 0)
            Rprintf("  * found node %s.\n", NODE(a[i] - 1));

          /* save the depth at which the node was found. */
          depth[a[i] - 1] = d + 1;

          /* update the counter of the traversed nodes. */
          traversed++;

        }/*THEN*/

      }/*FOR*/

    }/*FOR*/

    /* check whether all nodes have been traversed. */
    if (traversed == nnodes)
      break;

  }/*FOR*/

  /* allocate and initialize the return value. */
  PROTECT(result = allocMatrix(STRSXP, narcs/2, 2));

  for (i = 0, j = 0; i < narcs; i++) {

    if (depth[a[i] - 1] < depth[a[i + narcs] - 1]) {

      SET_STRING_ELT(result, j, STRING_ELT(arcs, i));
      SET_STRING_ELT(result, j + narcs/2, STRING_ELT(arcs, i + narcs));
      j++;

    }/*THEN*/

  }/*FOR*/

  UNPROTECT(3);

  Free1D(depth);

  return result;

}/*TREE_DIRECTIONS*/
コード例 #13
0
ファイル: mi.matrix.c プロジェクト: cran/bnlearn
/* Chow-Liu structure learning algorithm. */
SEXP chow_liu(SEXP data, SEXP nodes, SEXP estimator, SEXP whitelist,
    SEXP blacklist, SEXP conditional, SEXP debug) {

int i = 0, j = 0, k = 0, debug_coord[2], ncol = length(data);
int num = length(VECTOR_ELT(data, 0)), narcs = 0, nwl = 0, nbl = 0;
int *nlevels = NULL, clevels = 0, *est = INTEGER(estimator), *depth = NULL;
int *wl = NULL, *bl = NULL, *poset = NULL, debuglevel = isTRUE(debug);
void **columns = NULL, *cond = NULL;
short int *include = NULL;
double *mim = NULL, *means = NULL, *sse = NULL;
SEXP arcs, wlist, blist;

  /* dereference the columns of the data frame. */
  DEREFERENCE_DATA_FRAME()

  /* only TAN uses a conditional variable, so assume it's discrete and go ahead. */
  if (conditional != R_NilValue) {

    cond = (void *) INTEGER(conditional);
    clevels = NLEVELS(conditional);

  }/*THEN*/

  /* allocate the mutual information matrix and the status vector. */
  mim = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(double));
  include = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(short int));

  /* compute the pairwise mutual information coefficients. */
  if (debuglevel > 0)
    Rprintf("* computing pairwise mutual information coefficients.\n");

  mi_matrix(mim, columns, ncol, nlevels, &num, cond, &clevels, means, sse, est);

  LIST_MUTUAL_INFORMATION_COEFS();

  /* add whitelisted arcs first. */
  if ((!isNull(whitelist)) && (length(whitelist) > 0)) {

    PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE));
    wl = INTEGER(wlist);
    nwl = length(wlist);

    for (i = 0; i < nwl; i++) {

      if (debuglevel > 0) {

        Rprintf("* adding whitelisted arcs first.\n");

        if (include[wl[i]] == 0) {

          Rprintf("  > arc %s - %s has been added to the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was already present in the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (include[wl[i]] == 0)
        narcs++;
      /* include the arc in the graph. */
      include[wl[i]] = 1;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  /* cache blacklisted arcs. */
  if ((!isNull(blacklist)) && (length(blacklist) > 0)) {

    PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE));
    bl = INTEGER(blist);
    nbl = length(blist);

  }/*THEN*/

  /* sort the mutual information coefficients and keep track of the elements' index.  */
  poset = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(int));
  for (i = 0; i < UPTRI3_MATRIX(ncol); i++)
    poset[i] = i;
  R_qsort_I(mim, poset, 1, UPTRI3_MATRIX(ncol));

  depth = Calloc1D(ncol, sizeof(int));

  for (i = UPTRI3_MATRIX(ncol) - 1; i >= 0; i--) {

    /* get back the coordinates from the position in the half-matrix. */
    INV_UPTRI3(poset[i], ncol, debug_coord);

    /* already included all the arcs we had to, exiting. */
    if (narcs >= ncol - 1)
      break;
    /* arc already present in the graph, nothing to do. */
    if (include[poset[i]] == 1)
      continue;

    if (bl) {

      if (chow_liu_blacklist(bl, &nbl, poset + i)) {

        if (debuglevel > 0) {

          Rprintf("* arc %s - %s is blacklisted, skipping.\n",
            NODE(debug_coord[0]), NODE(debug_coord[1]));

        }/*THEN*/

        continue;

      }/*THEN*/

    }/*THEN*/

    if (c_uptri3_path(include, depth, debug_coord[0], debug_coord[1], ncol,
          nodes, FALSE)) {

      if (debuglevel > 0) {

        Rprintf("* arc %s - %s introduces cycles, skipping.\n",
          NODE(debug_coord[0]), NODE(debug_coord[1]));

      }/*THEN*/

      continue;

    }/*THEN*/

    if (debuglevel > 0) {

      Rprintf("* adding arc %s - %s with mutual information %lf.\n",
        NODE(debug_coord[0]), NODE(debug_coord[1]), mim[i]);

    }/*THEN*/

    /* include the arc in the graph. */
    include[poset[i]] = 1;
    /* update the counter. */
    narcs++;

  }/*FOR*/

  if ((!isNull(blacklist)) && (length(blacklist) > 0))
    UNPROTECT(1);

  /* sanity check for blacklist-related madnes. */
  if (narcs != ncol - 1)
    error("learned %d arcs instead of %d, this is not a tree spanning all the nodes.",
      narcs, ncol - 1);

  CONVERT_TO_ARC_SET(include, 0, 2 * (ncol - 1));

  Free1D(depth);
  Free1D(mim);
  Free1D(include);
  Free1D(poset);
  Free1D(columns);
  if (nlevels)
    Free1D(nlevels);
  if (means)
    Free1D(means);
  if (sse)
    Free1D(sse);

  return arcs;

}/*CHOW_LIU*/
コード例 #14
0
ファイル: mi.matrix.c プロジェクト: cran/bnlearn
/* ARACNE structure learning algorithm. */
SEXP aracne(SEXP data, SEXP estimator, SEXP whitelist, SEXP blacklist, SEXP debug) {

int i = 0, j = 0, k = 0, coord = 0, ncol = length(data);
int num = length(VECTOR_ELT(data, i)), narcs = ncol * (ncol - 1) / 2;
int *nlevels = NULL, *est = INTEGER(estimator), *wl = NULL, *bl = NULL;
int debuglevel = isTRUE(debug);
void **columns = NULL;
short int *exclude = NULL;
double *mim = NULL, *means = NULL, *sse = NULL;
SEXP arcs, nodes, wlist, blist;

  PROTECT(nodes = getAttrib(data, R_NamesSymbol));

  /* dereference the columns of the data frame. */
  DEREFERENCE_DATA_FRAME()

  /* allocate the mutual information matrix and the status vector. */
  mim = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(double));
  exclude = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(short int));

  /* compute the pairwise mutual information coefficients. */
  if (debuglevel > 0)
    Rprintf("* computing pairwise mutual information coefficients.\n");

  mi_matrix(mim, columns, ncol, nlevels, &num, NULL, NULL, means, sse, est);

  LIST_MUTUAL_INFORMATION_COEFS()

  /* compare all the triplets. */
  for (i = 0; i < ncol; i++) {

    for (j = i + 1; j < ncol; j++) {

      for (k = 0; k < ncol; k++) {

        if ((k == i) || (k == j))
          continue;

        /* cache the UPTRI3 coordinates of the arc. */
        coord = UPTRI3(i + 1, j + 1, ncol);

        /* if MI(X, Y) < min(MI(X, Z), MI(Z, Y)) drop arc X - Y. */
        if ((mim[coord] < mim[UPTRI3(i + 1, k + 1, ncol)]) &&
            (mim[coord] < mim[UPTRI3(j + 1, k + 1, ncol)])) {

          if (debuglevel > 0) {

            Rprintf("* dropping arc %s - %s because of %s, %lf < min(%lf, %lf)\n",
              NODE(i), NODE(j), NODE(k), mim[UPTRI3(i + 1, j + 1, ncol)],
              mim[UPTRI3(i + 1, k + 1, ncol)], mim[UPTRI3(j + 1, k + 1, ncol)]);

          }/*THEN*/

          /* update the status vector. */
          exclude[coord] = 1;
          /* decrement the number of arcs. */
          narcs--;

          break;

        }/*THEN*/

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  /* add back whitelisted arcs. */
  if ((!isNull(whitelist)) && (length(whitelist) > 0)) {

    PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE));
    wl = INTEGER(wlist);

    for (i = 0; i < length(wlist); i++) {

      if (debuglevel > 0) {

        Rprintf("* adding back whitelisted arcs.\n");

        if (exclude[wl[i]] == 1) {

          Rprintf("  > arc %s - %s has been added to the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + length(wlist))));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was already present in the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + length(wlist))));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (exclude[wl[i]] == 1)
        narcs++;
      /* include the arc in the graph. */
      exclude[wl[i]] = 0;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  /* remove blacklisted arcs. */
  if ((!isNull(blacklist)) && (length(blacklist) > 0)) {

    PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE));
    bl = INTEGER(blist);

    for (i = 0; i < length(blist); i++) {

      if (debuglevel > 0) {

        Rprintf("* removing blacklisted arcs.\n");

        if (exclude[bl[i]] == 0) {

          Rprintf("  > arc %s - %s has been dropped from the graph.\n",
            CHAR(STRING_ELT(blacklist, i)), CHAR(STRING_ELT(blacklist, i + length(blist))));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was not present in the graph.\n",
            CHAR(STRING_ELT(blacklist, i)), CHAR(STRING_ELT(blacklist, i + length(blist))));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (exclude[bl[i]] == 0)
        narcs--;
      /* remove the arc from the graph. */
      exclude[bl[i]] = 1;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  CONVERT_TO_ARC_SET(exclude, 1, 2 * narcs);

  Free1D(mim);
  Free1D(exclude);
  Free1D(columns);
  if (nlevels)
    Free1D(nlevels);
  if (means)
    Free1D(means);
  if (sse)
    Free1D(sse);

  UNPROTECT(1);

  return arcs;

}/*ARACNE*/
コード例 #15
0
ファイル: pdag2dag.c プロジェクト: cran/bnlearn
/* return the skeleton of a DAG/PDAG. */
SEXP dag2ug(SEXP bn, SEXP moral, SEXP debug) {

int i = 0, j = 0, k = 0, nnodes = 0, narcs = 0, row = 0;
int debuglevel = isTRUE(debug), *moralize = LOGICAL(moral);
int *nparents = NULL, *nnbr = NULL;
SEXP node_data, current, nodes, result, temp;

  /* get the nodes' data. */
  node_data = getListElement(bn, "nodes");
  nnodes = length(node_data);
  PROTECT(nodes = getAttrib(node_data, R_NamesSymbol));

  /* allocate and initialize parents' and neighbours' counters. */
  nnbr = Calloc1D(nnodes, sizeof(int));
  if (*moralize > 0)
    nparents = Calloc1D(nnodes, sizeof(int));

  /* first pass: count neighbours, parents and resulting arcs. */
  for (i = 0; i < nnodes; i++) {

    /* get the number of neighbours.  */
    current = VECTOR_ELT(node_data, i);
    nnbr[i] = length(getListElement(current, "nbr"));

    /* update the number of arcs to be returned. */
    if (*moralize > 0) {

      /* get also the number of parents, needed to account for the arcs added
       * for their moralization. */
      nparents[i] = length(getListElement(current, "parents"));
      narcs += nnbr[i] + nparents[i] * (nparents[i] - 1);

    }/*THEN*/
    else {

      narcs += nnbr[i];

    }/*ELSE*/

    if (debuglevel > 0)  {

      if (*moralize > 0) {

        Rprintf("* scanning node %s, found %d neighbours and %d parents.\n",
          NODE(i), nnbr[i], nparents[i]);
        Rprintf("  > adding %d arcs, for a total of %d.\n",
          nnbr[i] + nparents[i] * (nparents[i] - 1), narcs);

      }/*THEN*/
      else {

        Rprintf("* scanning node %s, found %d neighbours.\n", NODE(i), nnbr[i]);
        Rprintf("  > adding %d arcs, for a total of %d.\n", nnbr[i], narcs);

      }/*ELSE*/

    }/*THEN*/

  }/*FOR*/

  /* allocate the return value. */
  PROTECT(result = allocMatrix(STRSXP, narcs, 2));
  /* allocate and set the column names. */
  setDimNames(result, R_NilValue, mkStringVec(2, "from", "to"));

  /* second pass: fill the return value. */
  for (i = 0; i < nnodes; i++) {

    /* get to the current node. */
    current = VECTOR_ELT(node_data, i);
    /* get the neighbours. */
    temp = getListElement(current, "nbr");

    for (j = 0; j < nnbr[i]; j++) {

      SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(nodes, i));
      SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, j));
      row++;

    }/*FOR*/

    /* if we are not creating a moral graph we are done with this node. */
    if (*moralize == 0)
      continue;

    /* get the parents. */
    temp = getListElement(current, "parents");

    for (j = 0; j < nparents[i]; j++) {

      for (k = j+1; k < nparents[i]; k++) {

        SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(temp, k));
        SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, j));
        row++;
        SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(temp, j));
        SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, k));
        row++;

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  Free1D(nnbr);

  if (*moralize > 0) {

    /* be really sure not to return duplicate arcs in moral graphs when
     * shielded parents are present (the "shielding" nodes are counted
     * twice). */
    result = c_unique_arcs(result, nodes, FALSE);

    Free1D(nparents);

  }/*THEN*/

  UNPROTECT(2);

  return result;

}/*DAG2UG*/
コード例 #16
0
ファイル: hc.cache.lookup.c プロジェクト: cran/bnlearn
SEXP score_cache_fill(SEXP nodes, SEXP data, SEXP network, SEXP score,
    SEXP extra, SEXP reference, SEXP equivalence, SEXP decomposability,
    SEXP updated, SEXP amat, SEXP cache, SEXP blmat, SEXP debug) {

int *colsum = NULL, nnodes = length(nodes), lupd = length(updated);
int *a = NULL, *upd = NULL, *b = NULL, debuglevel = isTRUE(debug);
int i = 0, j = 0, k = 0;
double *cache_value = NULL;
SEXP arc, delta, op, temp;

  /* save a pointer to the adjacency matrix, the blacklist and the
   * updated nodes. */
  a = INTEGER(amat);
  b = INTEGER(blmat);
  upd = INTEGER(updated);

  /* if there are no nodes to update, return. */
  if (lupd == 0) return cache;

  /* set up row and column total to check for score equivalence;
   * zero means no parent nodes. */
  if (isTRUE(equivalence)) {

    colsum = Calloc1D(nnodes, sizeof(int));

    for (i = 0; i < nnodes; i++)
      for (j = 0; j < nnodes; j++)
        colsum[j] += a[CMC(i, j, nnodes)];

  }/*THEN*/

  /* allocate and initialize the cache. */
  cache_value = REAL(cache);

  /* allocate a two-slot character vector. */
  PROTECT(arc = allocVector(STRSXP, 2));

  /* allocate and initialize the fake score delta. */
  PROTECT(delta = ScalarReal(0));

  /* allocate and initialize the score.delta() operator. */
  PROTECT(op = mkString("set"));

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

       /* incident nodes must be different from each other. */
       if (i == j) continue;

       /* if only one or two nodes' caches need updating, skip the rest. */
       for (k = 0; k < lupd; k++)
         if (upd[k] == j)
           goto there;

       continue;

there:

       /* no need to compute the score delta for blacklisted arcs. */
       if (b[CMC(i, j, nnodes)] == 1)
         continue;

       /* use score equivalence if possible to check only one orientation. */
       if (isTRUE(equivalence)) {

         /* if the following conditions are met, look up the score delta of
          * the reverse of the current arc:
          *   1) that score delta has already been computed.
          *   2) both incident nodes have no parent, so the arc is really
          *      score equivalent (no v-structures).
          *   3) the reversed arc has not been blacklisted, as the score delta
          *      is not computed in this case. */
         if ((i > j) && (colsum[i] + colsum[j] == 0) && (b[CMC(j, i, nnodes)] == 0)) {

           cache_value[CMC(i, j, nnodes)] = cache_value[CMC(j, i, nnodes)];
           continue;

         }/*THEN*/

       }/*THEN*/

       /* save the nodes incident on the arc. */
       SET_STRING_ELT(arc, 0, STRING_ELT(nodes, i));
       SET_STRING_ELT(arc, 1, STRING_ELT(nodes, j));

       /* if the arc is not present in the graph it should be added;
        * otherwise it should be removed. */
       if (a[CMC(i, j, nnodes)] == 0)
         SET_STRING_ELT(op, 0, mkChar("set"));
       else
         SET_STRING_ELT(op, 0, mkChar("drop"));

       /* checkpoint allocated memory. */
       /* evaluate the call to score.delta() for the arc. */
       PROTECT(temp = score_delta(arc, network, data, score, delta, reference,
         op, extra, decomposability));

       cache_value[CMC(i, j, nnodes)] = NUM(VECTOR_ELT(temp, 1));
       UNPROTECT(1);

       if (debuglevel > 0)
         Rprintf("* caching score delta for arc %s -> %s (%lf).\n",
           CHAR(STRING_ELT(nodes, i)), CHAR(STRING_ELT(nodes, j)),
            cache_value[CMC(i, j, nnodes)]);

    }/*FOR*/

  }/*FOR*/

  UNPROTECT(3);

  if (isTRUE(equivalence))
    Free1D(colsum);

  return cache;

}/*HC_CACHE_FILL*/
コード例 #17
0
/* conditional posterior Dirichlet probability (covers BDe and K2 scores). */
double cdpost(SEXP x, SEXP y, SEXP iss, SEXP exp) {

int i = 0, j = 0, k = 0, imaginary = 0, num = length(x);
int llx = NLEVELS(x), lly = NLEVELS(y), p = llx * lly;
int *xx = INTEGER(x), *yy = INTEGER(y), **n = NULL, *nj = NULL;
double alpha = 0, res = 0;

  if (isNull(iss)) {

    /* this is for K2, which does not define an imaginary sample size;
     * all hyperparameters are set to 1 in the prior distribution. */
    imaginary = p;
    alpha = 1;

  }/*THEN*/
  else {

    /* this is for the BDe score. */
    imaginary = INT(iss);
    alpha = ((double) imaginary) / ((double) p);

  }/*ELSE*/

  /* initialize the contingency table. */
  n = (int **) Calloc2D(llx, lly, sizeof(int));
  nj = Calloc1D(lly, sizeof(int));

  /* compute the joint frequency of x and y. */
  if (exp == R_NilValue) {

    for (i = 0; i < num; i++) {

      n[xx[i] - 1][yy[i] - 1]++;
      nj[yy[i] - 1]++;

    }/*FOR*/

  }/*THEN*/
  else {

    int *e = INTEGER(exp);

    for (i = 0, k = 0; i < num; i++) {

      if (i != e[k] - 1) {

        n[xx[i] - 1][yy[i] - 1]++;
        nj[yy[i] - 1]++;

      }/*THEN*/
      else {

        k++;

      }/*ELSE*/

    }/*FOR*/

    /* adjust the sample size to match the number of observational data. */
   num -= length(exp);

  }/*ELSE*/

  /* compute the conditional posterior probability. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      res += lgammafn(n[i][j] + alpha) - lgammafn(alpha);
  for (j = 0; j < lly; j++)
    res += lgammafn((double)imaginary / lly) -
              lgammafn(nj[j] + (double)imaginary / lly);

  Free1D(nj);
  Free2D(n, llx);

  return res;

}/*CDPOST*/
コード例 #18
0
ファイル: ctest.c プロジェクト: stochasticresearch/bnlearn-r
/* parametric tests for Gaussian variables. */
static double ct_gaustests(SEXP xx, SEXP yy, SEXP zz, int nobs, int ntests,
    double *pvalue, double *df, test_e test) {

int i = 0, nsx = length(zz), ncols = nsx + 2;
double transform = 0, **column = NULL, *mean = NULL, statistic = 0, lambda = 0;
double *u = NULL, *d = NULL, *vt = NULL, *cov = NULL, *basecov = 0;

  /* compute the degrees of freedom for correlation and mutual information. */
  if (test == COR)
    *df = nobs - ncols;
  else if ((test == MI_G) || (test == MI_G_SH))
    *df = 1;

  if (((test == COR) && (*df < 1)) || ((test == ZF) && (nobs - ncols < 2))) {

    /* if there are not enough degrees of freedom, return independence. */
    warning("trying to do a conditional independence test with zero degrees of freedom.");

    *df = 0;
    statistic = 0;
    for (i = 0; i < ntests; i++)
      pvalue[i] = 1;

    return statistic;

  }/*THEN*/

  GAUSSIAN_CACHE();

  if (ntests > 1) {

    /* allocate and compute mean values and the covariance matrix. */
    mean = Calloc1D(ncols, sizeof(double));
    c_meanvec(column, mean, nobs, ncols, 1);
    c_covmat(column, mean, ncols, nobs, cov, 1);
    memcpy(basecov, cov, ncols * ncols * sizeof(double));

    for (i = 0; i < ntests; i++) {

      GAUSSIAN_PCOR_CACHE();

      if (test == COR) {

        COMPUTE_PCOR();
        transform = cor_t_trans(statistic, *df);
        pvalue[i] = 2 * pt(fabs(transform), *df, FALSE, FALSE);

      }/*THEN*/
      else if (test == MI_G) {

        COMPUTE_PCOR();
        statistic = 2 * nobs * cor_mi_trans(statistic);
        pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

      }/*THEN*/
      else if (test == MI_G_SH) {

        lambda = covmat_lambda(column, mean, cov, nobs, ncols);
        covmat_shrink(cov, ncols, lambda);
        COMPUTE_PCOR();
        statistic = 2 * nobs * cor_mi_trans(statistic);
        pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

      }/*THEN*/
      else if (test == ZF) {

        COMPUTE_PCOR();
        statistic = cor_zf_trans(statistic, (double)nobs - ncols);
        pvalue[i] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE);

      }/*THEN*/

    }/*FOR*/

  }/*THEN*/
  else {

    GAUSSIAN_PCOR_NOCACHE();

    if (test == COR) {

      COMPUTE_PCOR();
      transform = cor_t_trans(statistic, *df);
      pvalue[0] = 2 * pt(fabs(transform), *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == MI_G) {

      COMPUTE_PCOR();
      statistic = 2 * nobs * cor_mi_trans(statistic);
      pvalue[0] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == MI_G_SH) {

      lambda = covmat_lambda(column, mean, cov, nobs, ncols);
      covmat_shrink(cov, ncols, lambda);
      COMPUTE_PCOR();
      statistic = 2 * nobs * cor_mi_trans(statistic);
      pvalue[0] = pchisq(statistic, *df, FALSE, FALSE);

    }/*THEN*/
    else if (test == ZF) {

      COMPUTE_PCOR();
      statistic = cor_zf_trans(statistic, (double)nobs - ncols);
      pvalue[0] = 2 * pnorm(fabs(statistic), 0, 1, FALSE, FALSE);

    }/*THEN*/

  }/*ELSE*/

  GAUSSIAN_FREE();

  Free1D(mean);
  Free1D(column);

  return statistic;

}/*CT_GAUSTESTS*/
コード例 #19
0
/* shrinked conditional mutual information, to be used in C code. */
double c_shcmi(int *xx, int llx, int *yy, int lly, int *zz, int llz,
    int num, double *df) {

int i = 0, j = 0, k = 0;
double ***n = NULL, **ni = NULL, **nj = NULL, *nk = NULL;
double lambda = 0, target = 1/(double)(llx * lly * llz);
double res = 0;

  /* compute the degrees of freedom. */
  *df = (double)(llx - 1) * (double)(lly - 1) * (double)(llz);

  /* initialize the contingency table and the marginal frequencies. */
  n = (double ***) Calloc3D(llx, lly, llz, sizeof(double));
  ni = (double **) Calloc2D(llx, llz, sizeof(double));
  nj = (double **) Calloc2D(lly, llz, sizeof(double));
  nk = Calloc1D(llz, sizeof(double));

  /* compute the joint frequency of x, y, and z. */
  for (k = 0; k < num; k++)
    n[xx[k] - 1][yy[k] - 1][zz[k] - 1]++;

  /* estimate the optimal lambda for the data. */
  mi_lambda((double *)n, &lambda, target, num, llx, lly, llz);

  /* switch to the probability scale and shrink the estimates. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      for (k = 0; k < llz; k++)
        n[i][j][k] = lambda * target + (1 - lambda) * n[i][j][k] / num;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      for (k = 0; k < llz; k++) {

        ni[i][k] += n[i][j][k];
        nj[j][k] += n[i][j][k];
        nk[k] += n[i][j][k];

      }/*FOR*/

  for (k = 0; k < llz; k++) {

    /* check each level of the conditioning variable to avoid (again)
     * "divide by zero" errors. */
    if (nk[k] == 0)
      continue;

    for (j = 0; j < lly; j++) {

      for (i = 0; i < llx; i++) {

        if (n[i][j][k] > 0)
          res += n[i][j][k] * log( (n[i][j][k] * nk[k]) / (ni[i][k] * nj[j][k]) );

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  Free1D(nk);
  Free2D(ni, llx);
  Free2D(nj, lly);
  Free3D(n, llx, lly);

  return res;

}/*C_SHCMI*/
コード例 #20
0
ファイル: ctest.c プロジェクト: stochasticresearch/bnlearn-r
/* conditional linear Gaussian mutual information test. */
static double ct_micg(SEXP xx, SEXP yy, SEXP zz, int nobs, int ntests,
    double *pvalue, double *df) {

int xtype = 0, ytype = TYPEOF(yy), *nlvls = NULL, llx = 0, lly = 0, llz = 0;
int ndp = 0, ngp = 0, nsx = length(zz), **dp = NULL, *dlvls = NULL, j = 0, k = 0;
int i = 0, *zptr = 0;
void *xptr = NULL, *yptr = NULL, **columns = NULL;
double **gp = NULL;
double statistic = 0;
SEXP xdata;

  if (ytype == INTSXP) {

    /* cache the number of levels. */
    lly = NLEVELS(yy);
    yptr = INTEGER(yy);

  }/*THEN*/
  else {

    yptr = REAL(yy);

  }/*ELSE*/

  /* extract the conditioning variables and cache their types. */
  columns = Calloc1D(nsx, sizeof(void *));
  nlvls = Calloc1D(nsx, sizeof(int));
  df2micg(zz, columns, nlvls, &ndp, &ngp);

  dp = Calloc1D(ndp + 1, sizeof(int *));
  gp = Calloc1D(ngp + 1, sizeof(double *));
  dlvls = Calloc1D(ndp + 1, sizeof(int));
  for (i = 0, j = 0, k = 0; i < nsx; i++)
    if (nlvls[i] > 0) {

      dlvls[1 + j] = nlvls[i];
      dp[1 + j++] = columns[i];

    }/*THEN*/
    else {

      gp[1 + k++] = columns[i];

    }/*ELSE*/

  /* allocate vector for the configurations of the discrete parents; or, if
   * there no discrete parents, for the means of the continuous parents. */
  if (ndp > 0) {

    zptr = Calloc1D(nobs, sizeof(int));
    c_fast_config(dp + 1, nobs, ndp, dlvls + 1, zptr, &llz, 1);

  }/*THEN*/

  for (i = 0; i < ntests; i++) {

    xdata = VECTOR_ELT(xx, i);
    xtype = TYPEOF(xdata);

    if (xtype == INTSXP) {

      xptr = INTEGER(xdata);
      llx = NLEVELS(xdata);

    }/*THEN*/
    else {

      xptr = REAL(xdata);

    }/*ELSE*/

    if ((ytype == INTSXP) && (xtype == INTSXP)) {

      if (ngp > 0) {

        /* need to reverse conditioning to actually compute the test. */
        statistic = 2 * nobs * nobs *
                      c_cmicg_unroll(xptr, llx, yptr, lly, zptr, llz,
                                 gp + 1, ngp, df, nobs);

      }/*THEN*/
      else {

        /* the test reverts back to a discrete mutual information test. */
        statistic = 2 * nobs * c_cchisqtest(xptr, llx, yptr, lly, zptr, llz,
                                 nobs, df, MI);

      }/*ELSE*/

    }/*THEN*/
    else if ((ytype == REALSXP) && (xtype == REALSXP)) {

      gp[0] = xptr;
      statistic = 2 * nobs * c_cmicg(yptr, gp, ngp + 1, NULL, 0, zptr, llz,
                               dlvls, nobs);
      /* one regression coefficient for each conditioning level is added;
       * if all conditioning variables are continuous that's just one global
       * regression coefficient. */
      *df = (llz == 0) ? 1 : llz;

    }/*THEN*/
    else if ((ytype == INTSXP) && (xtype == REALSXP)) {

      dp[0] = yptr;
      dlvls[0] = lly;
      statistic = 2 * nobs * c_cmicg(xptr, gp + 1, ngp, dp, ndp + 1, zptr,
                               llz, dlvls, nobs);

      /* for each additional configuration of the discrete conditioning
       * variables plus the discrete yptr, one whole set of regression
       * coefficients (plus the intercept) is added. */
      *df = (lly - 1) * ((llz == 0) ? 1 : llz)  * (ngp + 1);

    }/*THEN*/
    else if ((ytype == REALSXP) && (xtype == INTSXP)) {

      dp[0] = xptr;
      dlvls[0] = llx;
      statistic = 2 * nobs * c_cmicg(yptr, gp + 1, ngp, dp, ndp + 1, zptr,
                               llz, dlvls, nobs);
      /* same as above, with xptr and yptr swapped. */
      *df = (llx - 1) * ((llz == 0) ? 1 : llz) * (ngp + 1);

    }/*ELSE*/

    pvalue[i] = pchisq(statistic, *df, FALSE, FALSE);

  }/*FOR*/

  Free1D(columns);
  Free1D(nlvls);
  Free1D(dlvls);
  Free1D(zptr);
  Free1D(dp);
  Free1D(gp);

  return statistic;

}/*CT_MICG*/
コード例 #21
0
ファイル: hc.cache.lookup.c プロジェクト: cran/bnlearn
/* a single step of the optimized hill climbing (one arc addition/removal/reversal). */
SEXP hc_opt_step(SEXP amat, SEXP nodes, SEXP added, SEXP cache, SEXP reference,
    SEXP wlmat, SEXP blmat, SEXP nparents, SEXP maxp, SEXP debug) {

int nnodes = length(nodes), i = 0, j = 0;
int *am = NULL, *ad = NULL, *w = NULL, *b = NULL, debuglevel = isTRUE(debug);
int counter = 0, update = 1, from = 0, to = 0, *path = NULL, *scratch = NULL;
double *cache_value = NULL, temp = 0, max = 0, tol = MACHINE_TOL;
double *mp = REAL(maxp), *np = REAL(nparents);
SEXP bestop;

  /* allocate and initialize the return value (use FALSE as a canary value). */
  PROTECT(bestop = allocVector(VECSXP, 3));
  setAttrib(bestop, R_NamesSymbol, mkStringVec(3, "op", "from", "to"));

  /* allocate and initialize a dummy FALSE object. */
  SET_VECTOR_ELT(bestop, 0, ScalarLogical(FALSE));

  /* allocate buffers for c_has_path(). */
  path = Calloc1D(nnodes, sizeof(int));
  scratch = Calloc1D(nnodes, sizeof(int));

  /* save pointers to the numeric/integer matrices. */
  cache_value = REAL(cache);
  ad = INTEGER(added);
  am = INTEGER(amat);
  w = INTEGER(wlmat);
  b = INTEGER(blmat);

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0; i < nnodes * nnodes; i++)
       counter += ad[i];

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to add one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (ad[CMC(i, j, nnodes)] == 0)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)];

      if (debuglevel > 0) {

        Rprintf("  > trying to add %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      /* this score delta is the best one at the moment, so add the arc if it
       * does not introduce cycles in the graph. */
      if (temp - max > tol) {

        if (c_has_path(j, i, am, nnodes, nodes, FALSE, FALSE, path, scratch,
              FALSE)) {

          if (debuglevel > 0)
            Rprintf("    > not adding, introduces cycles in the graph.\n");

          continue;

        }/*THEN*/

        if (debuglevel > 0)
          Rprintf("    @ adding %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "set", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0, counter = 0; i < nnodes * nnodes; i++)
       counter += am[i] * (1 - w[i]);

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to remove one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (am[CMC(i, j, nnodes)] == 0)
        continue;

      /* whitelisted arcs are not to be removed, ever. */
      if (w[CMC(i, j, nnodes)] == 1)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)];

      if (debuglevel > 0) {

        Rprintf("  > trying to remove %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      if (temp - max > tol) {

        if (debuglevel > 0)
          Rprintf("    @ removing %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "drop", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0, counter = 0; i < nnodes; i++)
       for (j = 0; j < nnodes; j++)
         counter += am[CMC(i, j, nnodes)] * (1 - b[CMC(j, i, nnodes)]);

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to reverse one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (am[CMC(i, j, nnodes)] == 0)
        continue;

      /* don't reverse an arc if the one in the opposite direction is
       * blacklisted, ever. */
      if (b[CMC(j, i, nnodes)] == 1)
        continue;

      /* do not reverse an arc if that means violating the limit on the
       * maximum number of parents. */
      if (np[i] >= *mp)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)] + cache_value[CMC(j, i, nnodes)];
      /* nuke small values and negative zeroes. */
      if (fabs(temp) < tol) temp = 0;

      if (debuglevel > 0) {

        Rprintf("  > trying to reverse %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      if (temp - max > tol) {

        if (c_has_path(i, j, am, nnodes, nodes, FALSE, TRUE, path, scratch,
              FALSE)) {

          if (debuglevel > 0)
            Rprintf("    > not reversing, introduces cycles in the graph.\n");

          continue;

        }/*THEN*/

        if (debuglevel > 0)
          Rprintf("    @ reversing %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "reverse", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;
        /* both nodes' reference scores must be updated. */
        update = 2;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  /* update the reference scores. */
  REAL(reference)[to] += cache_value[CMC(from, to, nnodes)];
  if (update == 2)
    REAL(reference)[from] += cache_value[CMC(to, from, nnodes)];

  Free1D(path);
  Free1D(scratch);

  UNPROTECT(1);

  return bestop;

}/*HC_OPT_STEP*/
コード例 #22
0
ファイル: hc.cache.lookup.c プロジェクト: cran/bnlearn
SEXP hc_to_be_added(SEXP arcs, SEXP blacklist, SEXP whitelist, SEXP nparents,
    SEXP maxp, SEXP nodes, SEXP convert) {

int i = 0, j = 0, narcs = 0, dims = length(nodes);
int *a = NULL, *coords = NULL;
double *mp = REAL(maxp), *np = NULL;
short int referenced = 0;
SEXP try, result = R_NilValue, result2;

  /* transform the arc set into an adjacency matrix, if it's not one already. */
  if (isInteger(arcs)) {

    if ((referenced = MAYBE_REFERENCED(arcs)))
      PROTECT(result = duplicate(arcs));

  }/*THEN*/
  else {

    PROTECT(result = arcs2amat(arcs, nodes));

  }/*ELSE*/

  /* dereference the adjacency matrix once and for all. */
  a = INTEGER(result);

  /* compute the number the parents of each node, unless provided. */
  if (nparents == R_NilValue) {

    np = Calloc1D(dims, sizeof(double));
    for (i = 0; i < dims; i++)
      for (j = 0; j < dims; j++)
        np[j] = a[CMC(i, j, dims)];

  }/*THEN*/
  else {

    np = REAL(nparents);

  }/*ELSE*/

  /* flip all the nondiagonal cells. */
  for (j = 0; j < dims; j++) {

    for (i = 0; i < dims; i++) {

      /* diagonal elements are always equal to zero, skip them. */
      if (i == j)
        continue;

      a[CMC(i, j, dims)] = 1 - a[CMC(i, j, dims)];

    }/*FOR*/

  }/*FOR*/

  /* if an arc is present in the graph in one direction, you cannot add it in
   * the other direction (it would be a reversal); flip both in the adjacency
   * matrix. */
  for (j = 0; j < dims; j++)
    for (i = j + 1; i < dims; i++)
      a[CMC(j, i, dims)] = a[CMC(i, j, dims)] = a[CMC(i, j, dims)] * a[CMC(j, i, dims)];

  /* if a node has already reached its maximum number parents, do not add
   * more arcs pointing to that node. */
  for (j = 0; j < dims; j++)
    if (np[j] >= *mp)
      memset(a + j * dims, '\0', dims * sizeof(int));

#define FLIP_FROM_LIST(list, value) \
  if (!isNull(list)) { \
    if (!isInteger(list)) { \
      PROTECT(try = match(nodes, list, 0)); \
      coords = INTEGER(try); \
      narcs = length(try)/2; \
      for (i = 0; i < narcs; i++)  \
        a[CMC(coords[i] - 1, coords[i + narcs] - 1, dims)] = value; \
      UNPROTECT(1); \
    }/*THEN*/ \
    else { \
      coords = INTEGER(list); \
      for (i = 0; i < dims * dims; i ++) \
        if (coords[i] == 1) \
          a[i] = value; \
    }/*ELSE*/ \
  }/*THEN*/

  /* now the blacklist gets involved. */
  FLIP_FROM_LIST(blacklist, 0);
  /* and, last but not least, the whitelist gets involved. */
  FLIP_FROM_LIST(whitelist, 1);

  if (nparents == R_NilValue)
    Free1D(np);

  /* return either the adjacency matrix or the arc set. */
  if (isTRUE(convert)) {

    PROTECT(result2 = amat2arcs(result, nodes));

    if (referenced || !isInteger(arcs))
      UNPROTECT(2);
    else
      UNPROTECT(1);
    return result2;

  }/*THEN*/
  else {

    if (referenced || !isInteger(arcs))
      UNPROTECT(1);
    return result;

  }/*ELSE*/

}/*HC_TO_BE_ADDED*/
コード例 #23
0
ファイル: cextend.c プロジェクト: cran/bnlearn
/* construct a consistent DAG extension of a CPDAG. */
SEXP pdag_extension(SEXP arcs, SEXP nodes, SEXP debug) {

int i = 0, j = 0, k = 0, t = 0, nnodes = length(nodes);
int changed = 0, left = nnodes;
int *a = NULL, *nbr = NULL, debuglevel = isTRUE(debug);
short int *matched = NULL;
SEXP amat, result;

  /* build and dereference the adjacency matrix. */
  PROTECT(amat = arcs2amat(arcs, nodes));
  a = INTEGER(amat);

  /* allocate and initialize the neighbours and matched vectors. */
  nbr = Calloc1D(nnodes, sizeof(int));
  matched = Calloc1D(nnodes, sizeof(short int));

  for (t = 0; t < nnodes; t++) {

    if (debuglevel > 0) {

      Rprintf("----------------------------------------------------------------\n");
      Rprintf("> performing pass %d.\n", t + 1);
      Rprintf("> candidate nodes: ");
        for (j = 0; j < nnodes; j++)
          if (matched[j] == 0)
            Rprintf("%s ", NODE(j));
      Rprintf("\n");

    }/*THEN*/

    for (i = 0; i < nnodes; i++) {

      /* if the node is already ok, skip it. */
      if (matched[i] != 0)
        continue;

      /* check whether the node is a sink (that is, whether is does not have
       * any child). */
      is_a_sink(a, i, &k, nnodes, nbr, matched);

      /* if the node is not a sink move on. */
      if (k == -1) {

        if (debuglevel > 0)
          Rprintf("  * node %s is not a sink.\n", NODE(i));

        continue;

      }/*THEN*/
      else {

        if (debuglevel > 0)
          Rprintf("  * node %s is a sink.\n", NODE(i));

      }/*ELSE*/

      if (!all_adjacent(a, i, k, nnodes, nbr)) {

        if (debuglevel > 0)
          Rprintf("  * not all nodes linked to %s by an undirected arc are adjacent.\n", NODE(i));

        continue;

      }/*THEN*/
      else {

        if (debuglevel > 0) {

          if (k == 0)
            Rprintf("  * no node is linked to %s by an undirected arc.\n", NODE(i));
          else
            Rprintf("  * all nodes linked to %s by an undirected arc are adjacent.\n", NODE(i));

        }/*THEN*/

      }/*ELSE*/

      /* the current node meets all the conditions, direct all the arcs towards it. */
      if (k == 0) {

        if (debuglevel > 0)
          Rprintf("  @ no undirected arc to direct towards %s.\n", NODE(i));

      }/*THEN*/
      else {

        for (j = 0; j < k; j++)
          a[CMC(i, nbr[j], nnodes)] = 0;

        if (debuglevel > 0)
          Rprintf("  @ directing all incident undirected arcs towards %s.\n", NODE(i));

      }/*ELSE*/

      /* set the changed flag. */
      changed = 1;

      /* exclude the node from later iterations. */
      matched[i] = 1;
      left--;

    }/*FOR*/

    /* if nothing changed in the last iteration or there are no more candidate
     * nodes, there is nothing else to do. */
    if ((changed == 0) || (left == 0))
      break;
    else
      changed = 0;

  }/*FOR*/

  /* build the new arc set from the adjacency matrix. */
  PROTECT(result = amat2arcs(amat, nodes));

  Free1D(nbr);
  Free1D(matched);
  UNPROTECT(2);

  return result;

}/*PDAG_EXTENSION*/
コード例 #24
0
/* posterior Dirichlet probability (covers BDe and K2 scores). */
double dpost(SEXP x, SEXP iss, SEXP exp) {

int i = 0, k = 0, num = length(x);
int llx = NLEVELS(x), *xx = INTEGER(x), *n = NULL, *imaginary = NULL;
double alpha = 0, res = 0;

  /* the correct vaules for the hyperparameters alpha are documented in
   * "Learning Bayesian Networks: The Combination of Knowledge and Statistical
   * Data" by Heckerman, Geiger & Chickering (1995), page 17. */

  if (isNull(iss)) {

    /* this is for K2, which does not define an imaginary sample size;
     * all hyperparameters are set to 1 in the prior distribution. */
    imaginary = &llx;
    alpha = 1;

  }/*THEN*/
  else {

    /* this is for the BDe score. */
    imaginary = INTEGER(iss);
    alpha = (double) *imaginary / (double) llx;

  }/*ELSE*/

  /* initialize the contingency table. */
  n = Calloc1D(llx, sizeof(int));

  /* compute the frequency table of x, disregarding experimental data. */
  if (exp == R_NilValue) {

    for (i = 0; i < num; i++)
      n[xx[i] - 1]++;

  }/*THEN*/
  else {

    int *e = INTEGER(exp);

    for (i = 0, k = 0; i < num; i++) {
      if (i != e[k] - 1)
        n[xx[i] - 1]++;
      else
        k++;

    }/*FOR*/

    /* adjust the sample size to match the number of observational data. */
   num -= length(exp);

  }/*ELSE*/

  /* compute the posterior probability. */
  for (i = 0; i < llx; i++)
    res += lgammafn(n[i] + alpha) - lgammafn(alpha);
  res += lgammafn((double)(*imaginary)) -
            lgammafn((double)(*imaginary + num));

  Free1D(n);

  return res;

}/*DPOST*/