Exemplo n.º 1
0
/* unconditional mutual information, to be used in C code. */
double c_mi(int *xx, int *llx, int *yy, int *lly, int *num) {

int i = 0, j = 0, k = 0;
int  **n = NULL, *ni = NULL, *nj = NULL;
double res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = alloc2dcont(*llx, *lly);
  ni = alloc1dcont(*llx);
  nj = alloc1dcont(*lly);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < *num; k++) {

    n[xx[k] - 1][yy[k] - 1]++;

  }/*FOR*/

  /* compute the marginals. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++) {

    ni[i] += n[i][j];
    nj[j] += n[i][j];

  }/*FOR*/

  /* compute the mutual information from the joint and marginal frequencies. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++)
      res += MI_PART(n[i][j], ni[i], nj[j], *num);

  return (res)/(*num);

}/*C_MI*/
SEXP dlik(SEXP x) {

int i = 0, k = 0;
int *n = NULL, *xx = INTEGER(x), llx = NLEVELS(x), num = LENGTH(x);
double *res = NULL;
SEXP result;

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* initialize the contingency table. */
  n = alloc1dcont(llx);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < num; k++) {

    n[xx[k] - 1]++;

  }/*FOR*/

  /* compute the entropy from the joint and marginal frequencies. */
  for (i = 0; i < llx; i++) {

    if (n[i] != 0)
      *res += (double)n[i] * log((double)n[i] / num);

  }/*FOR*/

  UNPROTECT(1);

  return result;

}/*DLIK*/
Exemplo n.º 3
0
/* compute the cached values fro all nodes. */
SEXP cache_structure(SEXP nodes, SEXP amat, SEXP debug) {

int i = 0, debuglevel = LOGICAL(debug)[0], length_nodes = LENGTH(nodes);
int *status = NULL, *a = INTEGER(amat);

  SEXP bn, temp;

  /* allocate the list and set its attributes.*/
  PROTECT(bn = allocVector(VECSXP, length_nodes));
  setAttrib(bn, R_NamesSymbol, nodes);

  /* allocate and intialize the status vector. */
  status = alloc1dcont(length_nodes);

  if (isTRUE(debug))
    Rprintf("* (re)building cached information about network structure.\n");

  /* populate the list with nodes' data. */
  for (i = 0; i < length_nodes; i++) {

    /* (re)initialize the status vector. */
    memset(status, '\0', sizeof(int) * length_nodes);

    temp = cache_node_structure(i, nodes, a, length_nodes, status, debuglevel);

    /* save the returned list. */
    SET_VECTOR_ELT(bn, i, temp);

  }/*FOR*/

  UNPROTECT(1);

  return bn;

}/*CACHE_STRUCTURE*/
Exemplo n.º 4
0
/* initialize a three-dimensional contingency table and the marginals. */
void fill_3d_table(int *xx, int *yy, int *zz, int ****n, int ***ni, int ***nj,
    int **nk, int llx, int lly, int llz, int num) {

int i = 0, j = 0, k = 0;

  *n = alloc3dcont(llz, llx, lly);
  *ni = alloc2dcont(llz, llx);
  *nj = alloc2dcont(llz, lly);
  *nk = alloc1dcont(llz);

  /* compute the joint frequency of x, y, and z. */
  for (k = 0; k < num; k++)
    (*n)[zz[k] - 1][xx[k] - 1][yy[k] - 1]++;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      for (k = 0; k < llz; k++) {

        (*ni)[k][i] += (*n)[k][i][j];
        (*nj)[k][j] += (*n)[k][i][j];
        (*nk)[k] += (*n)[k][i][j];

      }/*FOR*/

}/*FILL_3D_TABLE*/
Exemplo n.º 5
0
/* compute the cached values for a single node (C-friendly). */
SEXP c_cache_partial_structure(int target, SEXP nodes, SEXP amat, int *status, SEXP debug) {

int debuglevel = LOGICAL(debug)[0], length_nodes = LENGTH(nodes);
int *a = INTEGER(amat);

  /* allocate and initialize the status vector. */
  if (!(*status))
    status = alloc1dcont(length_nodes);

  /* return the corresponding part of the bn structure. */
  return cache_node_structure(target, nodes, a, length_nodes, status, debuglevel);

}/*C_CACHE_PARTIAL_STRUCTURE*/
Exemplo n.º 6
0
/* initialize a two-dimensional contingency table and the marginals. */
void fill_2d_table(int *xx, int *yy, int ***n, int **ni, int **nj, int llx,
    int lly, int num) {

int i = 0, j = 0, k = 0;

  *n = alloc2dcont(llx, lly);
  *ni = alloc1dcont(llx);
  *nj = alloc1dcont(lly);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < num; k++)
    (*n)[xx[k] - 1][yy[k] - 1]++;

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++) {

    (*ni)[i] += (*n)[i][j];
    (*nj)[j] += (*n)[i][j];

  }/*FOR*/

}/*FILL_2D_TABLE*/
Exemplo n.º 7
0
SEXP smart_network_averaging(SEXP arcs, SEXP nodes, SEXP weights) {

int k = 0, from = 0, to = 0, nrows = LENGTH(arcs) / 2, dims = LENGTH(nodes);
int *a = NULL, *coords = NULL, *poset = NULL;
double *w = NULL;
SEXP weights2, amat, try, acyclic;

  /* allocate and initialize the adjacency matrix. */
  PROTECT(amat = allocMatrix(INTSXP, dims, dims));
  a = INTEGER(amat);
  memset(a, '\0', sizeof(int) * dims * dims);

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  coords = INTEGER(try);

  /* duplicate the weights to preserve the oroginal ones. */
  PROTECT(weights2 = duplicate(weights));
  w = REAL(weights2);

  /* sort the strength coefficients. */
  poset = alloc1dcont(nrows);
  for (k = 0; k < nrows; k++)
    poset[k] = k;
  R_qsort_I(w, poset, 1, nrows);

  /* iterate over the arcs in reverse order wrt their strength coefficients. */
  for (k = 0; k < nrows; k++) {

    from = coords[poset[k]] - 1;
    to = coords[poset[k] + nrows] - 1;

    /* add an arc only if it does not introduce cycles. */
    if (!c_has_path(to, from, a, dims, nodes, FALSE, TRUE, FALSE))
      a[CMC(from, to, dims)] = 1;
    else
      warning("arc %s -> %s would introduce cycles in the graph, ignoring.",
        NODE(from), NODE(to));

  }/*FOR*/

  /* convert the adjacency matrix back to an arc set and return it. */
  acyclic = amat2arcs(amat, nodes);

  UNPROTECT(3);

  return acyclic;

}/*SMART_NETWORK_AVERAGING*/
SEXP cdlik(SEXP x, SEXP y) {

int i = 0, j = 0, k = 0;
int **n = NULL, *nj = NULL;
int llx = NLEVELS(x), lly = NLEVELS(y), num = LENGTH(x);
int *xx = INTEGER(x), *yy = INTEGER(y);
double *res = NULL;
SEXP result;

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = alloc2dcont(llx, lly);
  nj = alloc1dcont(lly);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < num; k++) {

    n[xx[k] - 1][yy[k] - 1]++;

  }/*FOR*/

  /* compute the marginals. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++) {

      nj[j] += n[i][j];

    }/*FOR*/

  /* compute the conditional entropy from the joint and marginal
       frequencies. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++) {

      if (n[i][j] != 0)
        *res += (double)n[i][j] * log((double)n[i][j] / (double)nj[j]);

    }/*FOR*/

  UNPROTECT(1);

  return result;

}/*CDLIK*/
Exemplo n.º 9
0
/* unconditional discrete sampling. */
void rbn_discrete_root(SEXP result, int cur, SEXP cpt, int *num, int ordinal,
    SEXP fixed) {

int np = LENGTH(cpt), *gen = NULL, *workplace = NULL;
double *p = NULL;
SEXP generated, class, lvls;

  /* get the levels of the curent variable .*/
  lvls = VECTOR_ELT(getAttrib(cpt, R_DimNamesSymbol), 0);
  /* allocate the memory for the generated observations. */
  PROTECT(generated = allocVector(INTSXP, *num));
  gen = INTEGER(generated);

  if (fixed != R_NilValue) {

    int constant = INTEGER(match(lvls, fixed, 0))[0];

    for (int i = 0; i < *num; i++)
      gen[i] = constant;

  }/*THEN*/
  else {

    workplace = alloc1dcont(np);

    /* duplicate the probability table to save the original copy from tampering. */
    p = alloc1dreal(np);
    memcpy(p, REAL(cpt), np * sizeof(double));

    /* perform the random sampling. */
    ProbSampleReplace(np, p, workplace, *num, gen);

  }/*ELSE*/

  /* set up all the attributes for the newly generated observations. */
  setAttrib(generated, R_LevelsSymbol, lvls);
  if (ordinal) {

    PROTECT(class = allocVector(STRSXP, 2));
    SET_STRING_ELT(class, 0, mkChar("ordered"));
    SET_STRING_ELT(class, 1, mkChar("factor"));

  }/*THEN*/
  else {
Exemplo n.º 10
0
/* conditional mutual information, to be used in C code. */
double c_cmi(int *xx, int *llx, int *yy, int *lly, int *zz, int *llz, int *num) {

int i = 0, j = 0, k = 0;
int ***n = NULL, **ni = NULL, **nj = NULL, *nk = NULL;
double res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = alloc3dcont(*llx, *lly, *llz);
  ni = alloc2dcont(*llx, *llz);
  nj = alloc2dcont(*lly, *llz);
  nk = alloc1dcont(*llz);

  /* compute the joint frequency of x, y, and z. */
  for (k = 0; k < *num; k++) {

    n[xx[k] - 1][yy[k] - 1][zz[k] - 1]++;

  }/*FOR*/

  /* compute the marginals. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++)
      for (k = 0; k < *llz; k++) {

        ni[i][k] += n[i][j][k];
        nj[j][k] += n[i][j][k];
        nk[k] += n[i][j][k];

      }/*FOR*/

  /* compute the conditional mutual information from the joint and
     marginal frequencies. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++)
      for (k = 0; k < *llz; k++)
        res += MI_PART(n[i][j][k], ni[i][k], nj[j][k], nk[k]);

  res = res/(*num);

  return res;

}/*C_CMI*/
Exemplo n.º 11
0
/* compute the cached values for a single node (R-friendly). */
SEXP cache_partial_structure(SEXP nodes, SEXP target, SEXP amat, SEXP debug) {

int i = 0, debuglevel = LOGICAL(debug)[0], length_nodes = LENGTH(nodes);
char *t = (char *)CHAR(STRING_ELT(target, 0));
int *status = NULL, *a = INTEGER(amat);

  if (isTRUE(debug))
    Rprintf("* (re)building cached information about node %s.\n", t);

  /* allocate and initialize the status vector. */
  status = alloc1dcont(length_nodes);

  /* iterate fo find the node position in the array.  */
  for (i = 0; i < length_nodes; i++)
    if (!strcmp(t, CHAR(STRING_ELT(nodes, i))))
      break;

  /* return the corresponding part of the bn structure. */
  return cache_node_structure(i, nodes, a, length_nodes, status, debuglevel);

}/*CACHE_PARTIAL_STRUCTURE*/
Exemplo n.º 12
0
/* predict the values of one or more variables given one or more variables by
 * maximum a posteriori (MAP). */
SEXP mappred(SEXP node, SEXP fitted, SEXP data, SEXP n, SEXP from, SEXP debug) {

int i = 0, j = 0, k = 0, nobs = 0, nev = 0, nlvls = 0;
int *vartypes = NULL, nsims = INT(n), debuglevel = isTRUE(debug);
void **varptrs = NULL, **evptrs = NULL, *pred = NULL, *res = NULL;
SEXP result, colnames, evidence, evmatch, temp = R_NilValue;
SEXP cpdist, predicted, lvls = R_NilValue;
double *wgt = NULL;
long double *lvls_counts = NULL;

  /* extract the names of the variables in the data. */
  colnames = getAttrib(data, R_NamesSymbol);

  /* remove the name of the variable to predict. */
  nev = length(from);
  PROTECT(evmatch = match(colnames, from, 0));

  /* cache variable types and pointers. */
  vartypes = alloc1dcont(nev);
  varptrs = alloc1dpointer(nev);
  for (j = 0, k = 0; j < nev; j++) {

    temp = VECTOR_ELT(data, INTEGER(evmatch)[j] - 1);
    vartypes[k] = TYPEOF(temp);
    varptrs[k++] = DATAPTR(temp);

  }/*FOR*/

  /* cache the sample size. */
  nobs = length(temp);

  /* allocate a list to hold the evidence. */
  PROTECT(evidence = allocVector(VECSXP, nev));
  setAttrib(evidence, R_NamesSymbol, from);

  /* cache pointers to the elements of the evidence .*/
  evptrs = alloc1dpointer(nev);

  for (j = 0; j < nev; j++) {

    PROTECT(temp = allocVector(vartypes[j], 1));
    evptrs[j] = DATAPTR(temp);
    SET_VECTOR_ELT(evidence, j, temp);
    UNPROTECT(1);

  }/*FOR*/

  /* make the evidence a data frame to compact debugging output. */
  minimal_data_frame(evidence);

  /* allocate the return value. */
  PROTECT(result = fitnode2df(fitted, STRING_ELT(node, 0), nobs));
  res = DATAPTR(result);

  /* in the case of discrete variables, allocate scratch space for levels'
   * frequencies. */
  if (TYPEOF(result) == INTSXP) {

    lvls = getAttrib(result, R_LevelsSymbol);
    nlvls = length(lvls);
    lvls_counts = allocldouble(nlvls);

  }/*THEN*/

  /* allocate the weights. */
  wgt = alloc1dreal(nsims);

  /* allocate sratch space for the random samplings. */
  PROTECT(cpdist = fit2df(fitted, nsims));
  predicted = getListElement(cpdist, (char *)CHAR(STRING_ELT(node, 0)));
  pred = DATAPTR(predicted);

  /* iterate over the observations. */
  for (i = 0; i < nobs; i++) {

    /* copy the values into the list. */
    for (j = 0; j < nev; j++) {

      switch(vartypes[j]) {

        case REALSXP:

          *((double *)evptrs[j]) = ((double *)varptrs[j])[i];
          break;

        case INTSXP:

          *((int *)evptrs[j]) = ((int *)varptrs[j])[i];
          break;

      }/*SWITCH*/

    }/*FOR*/

    if (debuglevel > 0) {

      Rprintf("* predicting observation %d conditional on:\n", i);
      PrintValue(evidence);

    }/*THEN*/

    /* generate samples from the conditional posterior distribution. */
    c_rbn_master(fitted, cpdist, n, evidence, FALSE);
    /* compute the weights. */
    c_lw_weights(fitted, cpdist, nsims, wgt, from, FALSE);

    /* compute the posterior estimate. */
    switch(TYPEOF(predicted)) {

      case REALSXP:

        /* average the predicted values. */
        ((double *)res)[i] = posterior_mean((double *)pred, wgt, nsims,
                               debuglevel);
        break;

      case INTSXP:

        /* pick the most frequent value. */
        ((int *)res)[i] = posterior_mode((int *)pred, wgt, nsims, lvls_counts,
                            lvls, nlvls, debuglevel);
        break;

    }/*SWITCH*/

  }/*FOR*/

  UNPROTECT(4);

  return result;

}/*MAPPRED*/
Exemplo n.º 13
0
SEXP tiers(SEXP nodes, SEXP debug) {

int i = 0, j = 0, k = 0, narcs = 0, nnodes = 0, ntiers = LENGTH(nodes);
int *tier_size = NULL, *debuglevel = LOGICAL(debug), tier_start = 0, cur = 0;
SEXP flattened, blacklist, temp;

  /* allocate the counters for tiers' sizes.*/
  tier_size = alloc1dcont(ntiers);

  if (!isString(nodes)) {

    /* "node" is a list, each tier is an element. */
    for (i = ntiers - 1; i >= 0; i--) {

      temp = VECTOR_ELT(nodes, i);
      tier_size[i] = LENGTH(temp);
      nnodes += tier_size[i];
      narcs += (nnodes - tier_size[i]) * tier_size[i];

    }/*FOR*/

    /* flatten the tiers to keep manipulation later on simple. */
    PROTECT(flattened = allocVector(STRSXP, nnodes));

    for (i = 0, k = 0; i < ntiers; i++) {

      temp = VECTOR_ELT(nodes, i);

      for (j = 0; j < tier_size[i]; j++)
        SET_STRING_ELT(flattened, k++, STRING_ELT(temp, j));

    }/*FOR*/

  }/*THEN*/
  else {

    /* "node" is a character vector, which means that each node is in its own tier
     * and that there is no need to flatted it. */
    flattened = nodes;
    nnodes = LENGTH(nodes);
    for (i = 0; i < ntiers; i++)
      tier_size[i] = 1;

    /* the blacklist is the one resulting from a complete node ordering. */
    narcs = ntiers * (ntiers - 1) / 2;

  }/*ELSE*/

  /* allocate the return value. */
  PROTECT(blacklist = allocMatrix(STRSXP, narcs, 2));

  for (k = 0, i = 0; k < nnodes; k++) {

    temp = STRING_ELT(flattened, k);

    if (*debuglevel > 0)
      Rprintf("* current node is %s in tier %d.\n", CHAR(temp), i + 1);

    for (j = tier_start + tier_size[i]; j < nnodes; j++) {

      if (*debuglevel)
        Rprintf("  > blacklisting %s -> %s\n", CHAR(STRING_ELT(flattened, j)), CHAR(temp));

      SET_STRING_ELT(blacklist, cur, STRING_ELT(flattened, j));
      SET_STRING_ELT(blacklist, cur + narcs, temp);
      cur++;

    }/*FOR*/

    if (k >= tier_start + tier_size[i] - 1)
      tier_start += tier_size[i++];

    if (i == ntiers)
      break;

  }

  /* allocate, initialize and set the column names. */
  finalize_arcs(blacklist);

  if (!isString(nodes))
    UNPROTECT(2);
  else
    UNPROTECT(1);

  return blacklist;

}/*TIERS*/
Exemplo n.º 14
0
/* conditional posterior dirichlet probability (covers BDe and K2 scores). */
SEXP cdpost(SEXP x, SEXP y, SEXP iss, SEXP exp, SEXP nparams) {

int i = 0, j = 0, k = 0, imaginary = 0, num = LENGTH(x);
int llx = NLEVELS(x), lly = NLEVELS(y), *xx = INTEGER(x), *yy = INTEGER(y);
int **n = NULL, *nj = NULL;
double alpha = 0, *res = NULL, *p = REAL(nparams);
SEXP result;

  if (isNull(iss)) {

    /* this is for K2, which does not define an imaginary sample size;
     * all hyperparameters are set to 1 in the prior distribution. */
    imaginary = (int) *p;
    alpha = 1;

  }/*THEN*/
  else {

    /* this is for the BDe score. */
    imaginary = INT(iss);
    alpha = (double) imaginary / *p;

  }/*ELSE*/

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* initialize the contingency table. */
  n = alloc2dcont(llx, lly);
  nj = alloc1dcont(lly);

  /* compute the joint frequency of x and y. */
  if (exp == R_NilValue) {

    for (i = 0; i < num; i++) {

      n[xx[i] - 1][yy[i] - 1]++;
      nj[yy[i] - 1]++;

    }/*FOR*/

  }/*THEN*/
  else {

    int *e = INTEGER(exp);

    for (i = 0, k = 0; i < num; i++) {

      if (i != e[k] - 1) {

        n[xx[i] - 1][yy[i] - 1]++;
        nj[yy[i] - 1]++;

      }/*THEN*/
      else {

        k++;

      }/*ELSE*/

    }/*FOR*/

    /* adjust the sample size to match the number of observational data. */
   num -= LENGTH(exp);

  }/*ELSE*/

  /* compute the conditional posterior probability. */
  for (i = 0; i < llx; i++)
    for (j = 0; j < lly; j++)
      *res += lgammafn(n[i][j] + alpha) - lgammafn(alpha);
  for (j = 0; j < lly; j++)
    *res += lgammafn((double)imaginary / lly) -
              lgammafn(nj[j] + (double)imaginary / lly);

  UNPROTECT(1);
  return result;

}/*CDPOST*/
Exemplo n.º 15
0
/* posterior dirichlet probability (covers BDe and K2 scores). */
SEXP dpost(SEXP x, SEXP iss, SEXP exp) {

int i = 0, k = 0, num = LENGTH(x);
int llx = NLEVELS(x), *xx = INTEGER(x), *n = NULL, *imaginary = NULL;
double alpha = 0, *res = NULL;
SEXP result;

  /* the correct vaules for the hyperparameters alpha are documented in
   * "Learning Bayesian Networks: The Combination of Knowledge and Statistical
   * Data" by Heckerman, Geiger & Chickering (1995), page 17. */

  if (isNull(iss)) {

    /* this is for K2, which does not define an imaginary sample size;
     * all hyperparameters are set to 1 in the prior distribution. */
    imaginary = &llx;
    alpha = 1;

  }/*THEN*/
  else {

    /* this is for the BDe score. */
    imaginary = INTEGER(iss);
    alpha = (double) *imaginary / (double) llx;

  }/*ELSE*/

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* initialize the contingency table. */
  n = alloc1dcont(llx);

  /* compute the frequency table of x, disregarding experimental data. */
  if (exp == R_NilValue) {

    for (i = 0; i < num; i++)
      n[xx[i] - 1]++;

  }/*THEN*/
  else {

    int *e = INTEGER(exp);

    for (i = 0, k = 0; i < num; i++) {
      if (i != e[k] - 1)
        n[xx[i] - 1]++;
      else
        k++;

    }/*FOR*/

    /* adjust the sample size to match the number of observational data. */
   num -= LENGTH(exp);

  }/*ELSE*/

  /* compute the posterior probability. */
  for (i = 0; i < llx; i++)
    *res += lgammafn(n[i] + alpha) - lgammafn(alpha);
  *res += lgammafn((double)(*imaginary)) -
            lgammafn((double)(*imaginary + num));

  UNPROTECT(1);
  return result;

}/*DPOST*/
Exemplo n.º 16
0
/* conditional Monte Carlo simulation for discrete tests. */
SEXP cmcarlo_mean(SEXP x, SEXP y, SEXP z, SEXP lx, SEXP ly, SEXP lz,
    SEXP length, SEXP samples, SEXP test) {

double *fact = NULL, *res = NULL, observed = 0;
int **n = NULL, **ncolt = NULL, **nrowt = NULL, *ncond = NULL, *workspace = NULL;
int *num = INTEGER(length), *B = INTEGER(samples);
int *nr = INTEGER(lx), *nc = INTEGER(ly), *nl = INTEGER(lz);
int *xx = INTEGER(x), *yy = INTEGER(y), *zz = INTEGER(z);
int i = 0, j = 0, k = 0, npermuts = 0;
SEXP result;

  /* allocate and initialize the result */
  PROTECT(result = allocVector(REALSXP, 3));
  res = REAL(result);
  res[0] = res[1] = res[2] = 0; // initial test score / mean score / nb permutations

  /* allocate and compute the factorials needed by rcont2. */
  allocfact(*num);

  /* allocate and initialize the workspace for rcont2. */
  workspace = alloc1dcont(*nc);

  /* initialize the contingency table. */
  n = alloc2dcont(*nl, (*nr) * (*nc));

  /* initialize the marginal frequencies. */
  nrowt = alloc2dcont(*nl, *nr);
  ncolt = alloc2dcont(*nl, *nc);
  ncond = alloc1dcont(*nl);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < *num; k++)
    n[zz[k] - 1][CMC(xx[k] - 1, yy[k] - 1, *nr)]++;

  /* compute the marginals. */
  for (i = 0; i < *nr; i++)
    for (j = 0; j < *nc; j++)
      for (k = 0; k < *nl; k++) {

        nrowt[k][i] += n[k][CMC(i, j, *nr)];
        ncolt[k][j] += n[k][CMC(i, j, *nr)];
        ncond[k] += n[k][CMC(i, j, *nr)];

      }/*FOR*/

  /* initialize the random number generator. */
  GetRNGstate();
  
  /* pick up the observed value of the test statistic, then generate a set of
     random contingency tables (given row and column totals) and adds their
     test scores to compute the mean.*/
  switch(INT(test)) {

    case MUTUAL_INFORMATION:
      observed = 2 * _cmi(n, nrowt, ncolt, ncond, nr, nc, nl);
      
      for (j = 0; j < *B; j++) {
      
        for (k = 0; k < *nl; k++)
          rcont2(nr, nc, nrowt[k], ncolt[k], &(ncond[k]), fact, workspace, n[k]);
          
        res[1] += 2 * _cmi(n, nrowt, ncolt, ncond, nr, nc, nl);
        npermuts++;
      }

      break;

    case PEARSON_X2:
      observed = _cx2(n, nrowt, ncolt, ncond, nr, nc, nl);
      
      for (j = 0; j < *B; j++) {
      
        for (k = 0; k < *nl; k++)
          rcont2(nr, nc, nrowt[k], ncolt[k], &(ncond[k]), fact, workspace, n[k]);
          
        res[1] += _cx2(n, nrowt, ncolt, ncond, nr, nc, nl);
        npermuts++;
      }

      break;

  }/*SWITCH*/

  PutRNGstate();

  /* save the observed and mean values of the statistic,
     and the number of permutations performed. */
  res[0] = observed;
  res[1] /= *B; // mean
  res[2] = npermuts;

  UNPROTECT(1);

  return result;

}/*CMCARLO_MEAN*/
Exemplo n.º 17
0
/* predict the value of a discrete node with one or more parents. */
SEXP cdpred(SEXP fitted, SEXP data, SEXP parents, SEXP debug) {

int i = 0, k = 0, ndata = LENGTH(data), nrows = 0, ncols = 0;
int *configs = INTEGER(parents), *debuglevel = LOGICAL(debug);
int *iscratch = NULL, *maxima = NULL, *nmax = NULL, *res = NULL;
double *prob = NULL, *dscratch = NULL, *buf = NULL;
SEXP temp, result, tr_levels = getAttrib(data, R_LevelsSymbol);

  /* get the probabilities of the multinomial distribution. */
  temp = getListElement(fitted, "prob");
  nrows = INT(getAttrib(temp, R_DimSymbol));
  ncols = LENGTH(temp) / nrows;
  prob = REAL(temp);

  /* create the vector of indexes. */
  iscratch = alloc1dcont(nrows);

  /* create a scratch copy of the array. */
  buf = alloc1dreal(nrows);
  dscratch = alloc1dreal(nrows * ncols);
  memcpy(dscratch, prob, nrows * ncols * sizeof(double));

  /* allocate the array for the indexes of the maxima. */
  maxima = alloc1dcont(nrows * ncols);

  /* allocate the maxima counters. */
  nmax = alloc1dcont(ncols);

  /* get the mode for each configuration. */
  for (i = 0; i < ncols; i++) {

    /* initialize the vector of indexes. */
    for (k = 0; k < nrows; k++)
      iscratch[k] = k + 1;

    /* find out the mode(s). */
    all_max(dscratch + i * nrows, nrows, maxima + i * nrows,
      nmax + i, iscratch, buf);

  }/*FOR*/

  /* allocate and initialize the return value. */
  PROTECT(result = allocVector(INTSXP, ndata));
  res = INTEGER(result);

  /* initialize the random seed, just in case we need it for tie breaking. */
  GetRNGstate();

  /* copy the index of the mode in the return value. */
  for (i = 0; i < ndata; i++) {

    if (nmax[configs[i] - 1] == 1) {

      res[i] = maxima[CMC(0, configs[i] - 1, nrows)];

      if (*debuglevel > 0) {

        if (res[i] == NA_INTEGER)
          Rprintf("  > prediction for observation %d is NA with probabilities:\n");
        else
          Rprintf("  > prediction for observation %d is '%s' with probabilities:\n",
            i + 1, CHAR(STRING_ELT(tr_levels, res[i] - 1)));

        Rprintf("  ");
        for (int k = 0; k < nrows; k++)
          Rprintf("  %lf", (prob + nrows * (configs[i] - 1))[k]);
        Rprintf("\n");

      }/*THEN*/

    }/*THEN*/
    else {

      /* break ties: sample with replacement from all the maxima. */
      SampleReplace(1, nmax[configs[i] - 1], res + i, maxima + (configs[i] - 1) * nrows);

      if (*debuglevel > 0) {

        Rprintf("  > there are %d levels tied for prediction of observation %d, applying tie breaking.\n",
          nmax[configs[i] - 1], i + 1);
        Rprintf("  > tied levels are:");
        for (k = 0; k < nmax[configs[i] - 1]; k++)
          Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[CMC(k, configs[i] - 1, nrows)] - 1)));
        Rprintf(".\n");

      }/*THEN*/

    }/*ELSE*/

  }/*FOR*/

  /* save the state of the random number generator. */
  PutRNGstate();

  /* copy the labels and the class from the input data. */
  setAttrib(result, R_LevelsSymbol, tr_levels);
  setAttrib(result, R_ClassSymbol, getAttrib(data, R_ClassSymbol));

  UNPROTECT(1);

  return result;

}/*CDPRED*/
Exemplo n.º 18
0
/* set the directions of the arcs in a tree given the root node. */
SEXP tree_directions(SEXP arcs, SEXP nodes, SEXP root, SEXP debug) {

int i = 0, j = 0, d = 0, traversed = 1;
int narcs = LENGTH(arcs)/2, nnodes = LENGTH(nodes);
int *a = NULL, *depth = 0, *debuglevel = LOGICAL(debug);
SEXP try, try2, result;

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  a = INTEGER(try);

  /* match the root node. */
  PROTECT(try2 = match(nodes, root, 0));

  /* allocate and initialize the statust vector. */
  depth = alloc1dcont(nnodes);
  depth[INT(try2) - 1] = 1;

  if (*debuglevel > 0)
    Rprintf("> root node (depth 1) is %s.\n", NODE(INT(try2) - 1));

  for (d = 1; d <= nnodes; d++) {

    if (*debuglevel > 0)
      Rprintf("> considering nodes at depth %d.\n", d + 1);

    for (i = 0; i < narcs; i++) {

      for (j = 0; j < nnodes; j++) {

        /* disregard nodes at the wrong depth. */
        if (depth[j] != d)
          continue;

        if ((a[i + narcs] == (j + 1)) && (depth[a[i] - 1] == 0)) {

          if (*debuglevel > 0)
            Rprintf("  * found node %s.\n", NODE(a[i] - 1));

          /* save the depth at which the node was found. */
          depth[a[i] - 1] = d + 1;

          /* update the counter of the traversed nodes. */
          traversed++;

        }/*THEN*/

      }/*FOR*/

    }/*FOR*/

    /* check whether all nodes have been traversed. */
    if (traversed == nnodes)
      break;

  }/*FOR*/

  /* allocate and initialize the return value. */
  PROTECT(result = allocMatrix(STRSXP, narcs/2, 2));

  for (i = 0, j = 0; i < narcs; i++) {

    if (depth[a[i] - 1] < depth[a[i + narcs] - 1]) {

      SET_STRING_ELT(result, j, STRING_ELT(arcs, i));
      SET_STRING_ELT(result, j + narcs/2, STRING_ELT(arcs, i + narcs));
      j++;

    }/*THEN*/

  }/*FOR*/

  UNPROTECT(3);

  return result;

}/*TREE_DIRECTIONS*/
Exemplo n.º 19
0
SEXP score_cache_fill(SEXP nodes, SEXP data, SEXP network, SEXP score,
    SEXP extra, SEXP reference, SEXP equivalence, SEXP decomposability,
    SEXP updated, SEXP amat, SEXP cache, SEXP blmat, SEXP debug) {

int *colsum = NULL, nnodes = length(nodes), lupd = length(updated);
int *a = NULL, *upd = NULL, *b = NULL, debuglevel = isTRUE(debug);
int i = 0, j = 0, k = 0;
double *cache_value = NULL;
void *p = NULL;
SEXP arc, delta, op, temp;

  /* save a pointer to the adjacency matrix, the blacklist and the
   * updated nodes. */
  a = INTEGER(amat);
  b = INTEGER(blmat);
  upd = INTEGER(updated);

  /* if there are no nodes to update, return. */
  if (lupd == 0) return cache;

  /* set up row and column total to check for score equivalence;
   * zero means no parent nodes. */
  if (isTRUE(equivalence)) {

    colsum = alloc1dcont(nnodes);

    for (i = 0; i < nnodes; i++)
      for (j = 0; j < nnodes; j++)
        colsum[j] += a[CMC(i, j, nnodes)];

  }/*THEN*/

  /* allocate and initialize the cache. */
  cache_value = REAL(cache);

  /* allocate a two-slot character vector. */
  PROTECT(arc = allocVector(STRSXP, 2));

  /* allocate and initialize the fake score delta. */
  PROTECT(delta = ScalarReal(0));

  /* allocate and initialize the score.delta() operator. */
  PROTECT(op = mkString("set"));

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

       /* incident nodes must be different from each other. */
       if (i == j) continue;

       /* if only one or two nodes' caches need updating, skip the rest. */
       for (k = 0; k < lupd; k++)
         if (upd[k] == j)
           goto there;

       continue;

there:

       /* no need to compute the score delta for blacklisted arcs. */
       if (b[CMC(i, j, nnodes)] == 1)
         continue;

       /* use score equivalence if possible to check only one orientation. */
       if (isTRUE(equivalence)) {

         /* if the following conditions are met, look up the score delta of
          * the reverse of the current arc:
          *   1) that score delta has already been computed.
          *   2) both incident nodes have no parent, so the arc is really
          *      score equivalent (no v-structures).
          *   3) the reversed arc has not been blacklisted, as the score delta
          *      is not computed in this case. */
         if ((i > j) && (colsum[i] + colsum[j] == 0) && (b[CMC(j, i, nnodes)] == 0)) {

           cache_value[CMC(i, j, nnodes)] = cache_value[CMC(j, i, nnodes)];
           continue;

         }/*THEN*/

       }/*THEN*/

       /* save the nodes incident on the arc. */
       SET_STRING_ELT(arc, 0, STRING_ELT(nodes, i));
       SET_STRING_ELT(arc, 1, STRING_ELT(nodes, j));

       /* if the arc is not present in the graph it should be added;
        * otherwise it should be removed. */
       if (a[CMC(i, j, nnodes)] == 0)
         SET_STRING_ELT(op, 0, mkChar("set"));
       else
         SET_STRING_ELT(op, 0, mkChar("drop"));

       /* checkpoint allocated memory. */
       p = vmaxget();
       /* evaluate the call to score.delta() for the arc. */
       PROTECT(temp = score_delta(arc, network, data, score, delta, reference,
         op, extra, decomposability));
       vmaxset(p);

       cache_value[CMC(i, j, nnodes)] = NUM(VECTOR_ELT(temp, 1));
       UNPROTECT(1);

       if (debuglevel > 0)
         Rprintf("* caching score delta for arc %s -> %s (%lf).\n",
           CHAR(STRING_ELT(nodes, i)), CHAR(STRING_ELT(nodes, j)),
            cache_value[CMC(i, j, nnodes)]);

    }/*FOR*/

  }/*FOR*/

  UNPROTECT(3);
  return cache;

}/*HC_CACHE_FILL*/
Exemplo n.º 20
0
/* predict the value of a discrete node without parents. */
SEXP dpred(SEXP fitted, SEXP data, SEXP debug) {

int i = 0, nmax = 0, ndata = LENGTH(data), length = 0;
int *res = NULL, *debuglevel = LOGICAL(debug), *iscratch = NULL, *maxima = NULL;
double *prob = NULL, *dscratch = NULL, *buf = NULL;
SEXP ptab, result, tr_levels = getAttrib(data, R_LevelsSymbol);

  /* get the probabilities of the multinomial distribution. */
  ptab = getListElement(fitted, "prob");
  length = LENGTH(ptab);
  prob = REAL(ptab);

  /* create the vector of indexes. */
  iscratch = alloc1dcont(length);
  for (i = 0; i < length; i++)
    iscratch[i] = i + 1;

  /* create a scratch copy of the array. */
  buf = alloc1dreal(length);
  dscratch = alloc1dreal(length);
  memcpy(dscratch, prob, length * sizeof(double));

  /* allocate the array for the indexes of the maxima. */
  maxima = alloc1dcont(length);

  /* find out the mode(s). */
  all_max(dscratch, length, maxima, &nmax, iscratch, buf);

  /* allocate and initialize the return value. */
  PROTECT(result = allocVector(INTSXP, ndata));
  res = INTEGER(result);

  if (nmax == 1) {

    /* copy the index of the mode in the return value. */
    for (i = 0; i < ndata; i++)
      res[i] = maxima[0];

    if (*debuglevel > 0) {

      if (res[0] == NA_INTEGER)
        Rprintf("  > prediction for all observations is NA with probabilities:\n");
      else
        Rprintf("  > prediction for all observations is '%s' with probabilities:\n",
          CHAR(STRING_ELT(tr_levels, res[0] - 1)));

      Rprintf("  ");
      for (i = 0; i < LENGTH(ptab); i++)
        Rprintf("  %lf", prob[i]);
      Rprintf("\n");

    }/*THEN*/

  }/*THEN*/
  else {

    /* break ties: sample with replacement from all the maxima. */
    GetRNGstate();
    SampleReplace(ndata, nmax, res, maxima);
    PutRNGstate();

    if (*debuglevel > 0) {

      Rprintf("  > there are %d levels tied for prediction, applying tie breaking.\n", nmax);
      Rprintf("  > tied levels are:");
      for (i = 0; i < nmax; i++)
        Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[i] - 1)));
      Rprintf(".\n");

    }/*THEN*/

  }/*ELSE*/

  /* copy the labels and the class from the input data. */
  setAttrib(result, R_LevelsSymbol, tr_levels);
  setAttrib(result, R_ClassSymbol, getAttrib(data, R_ClassSymbol));

  UNPROTECT(1);

  return result;

}/*DPRED*/
Exemplo n.º 21
0
/* unconditional Monte Carlo simulation for correlation-based tests. */
SEXP gauss_mcarlo(SEXP x, SEXP y, SEXP samples, SEXP test, SEXP alpha) {

int j = 0, k = 0, num = LENGTH(x), *B = INTEGER(samples);
double *xx = REAL(x), *yy = REAL(y), *yperm = NULL, *res = NULL;
double observed = 0, enough = ceil(NUM(alpha) * (*B)) + 1, xm = 0, ym = 0;
int *perm = NULL, *work = NULL;
SEXP result;

  /* allocate the arrays needed by RandomPermutation. */
  perm = alloc1dcont(num);
  work = alloc1dcont(num);

  /* allocate the array for the pemutations. */
  yperm = alloc1dreal(num);

  /* cache the means of the two variables (they are invariant under permutation). */
  for (j = 0; j < num; j++) {

    xm += xx[j];
    ym += yy[j];

  }/*FOR*/

  xm /= num;
  ym /= num;

  /* allocate the result. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* initialize the random number generator. */
  GetRNGstate();

  /* pick up the observed value of the test statistic, then generate a set of
     random permutations (all variable but the second are fixed) and check how
     many tests are greater (in absolute value) than the original one.*/
  switch(INT(test)) {

    case GAUSSIAN_MUTUAL_INFORMATION:
    case LINEAR_CORRELATION:
    case FISHER_Z:
      observed = _cov(xx, yy, &xm, &ym, &num);

      for (j = 0; j < *B; j++) {

        RandomPermutation(num, perm, work);

        for (k = 0; k < num; k++)
          yperm[k] = yy[perm[k]];

        if (fabs(_cov(xx, yperm, &xm, &ym, &num)) > fabs(observed)) {

          sequential_counter_check(*res);

        }/*THEN*/

      }/*FOR*/

    break;

  }/*SWITCH*/

  PutRNGstate();

  /* save the observed p-value. */
  *res /= *B;

  UNPROTECT(1);

  return result;

}/*GAUSS_MCARLO*/
Exemplo n.º 22
0
/* conditional Monte Carlo simulation for correlation-based tests. */
SEXP gauss_cmcarlo(SEXP data, SEXP length, SEXP samples, SEXP test, SEXP alpha) {

int j = 0, k = 0, ncols = LENGTH(data), errcode = 0, *work = NULL, *perm = NULL;
int error_counter = 0, *B = INTEGER(samples), *num = INTEGER(length);
double observed = 0, permuted = 0, *yperm = NULL, *yorig = NULL, *res = NULL;
double enough = ceil(NUM(alpha) * (*B)) + 1;
double **column = NULL, *mean = NULL, *covariance = NULL, *covariance_backup = NULL;
double *u = NULL, *d = NULL, *vt = NULL;
SEXP result;

  /* allocate the matrices needed for the SVD decomposition. */
  u = alloc1dreal(ncols * ncols);
  d = alloc1dreal(ncols);
  vt = alloc1dreal(ncols * ncols);

  /* allocate and initialize the result. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* allocate and initialize an array of pointers for the variables. */
  column = (double **) alloc1dpointer(ncols);
  for (j = 0; j < ncols; j++)
    column[j] = REAL(VECTOR_ELT(data, j));

  /* cache the means of the variables (they are invariant under permutation). */
  mean = alloc1dreal(ncols);

  /* compute the mean values  */
  for (j = 0; j < ncols; j++) {

    for (k = 0 ; k < *num; k++)
      mean[j] += column[j][k];

    mean[j] /= (*num);

  }/*FOR*/

  /* allocate and initialize the covariance matrix. */
  covariance = alloc1dreal(ncols * ncols);
  covariance_backup = alloc1dreal(ncols * ncols);
  c_covmat(column, mean, &ncols, num, covariance);
  memcpy(covariance_backup, covariance, ncols * ncols * sizeof(double));

  /* substitute the original data with the fake column that will be permuted. */
  yperm = alloc1dreal(*num);
  yorig = column[1];
  memcpy(yperm, yorig, *num * sizeof(double));
  column[1] = yperm;

   /* allocate the arrays needed by RandomPermutation. */
  perm = alloc1dcont(*num);
  work = alloc1dcont(*num);

  /* initialize the random number generator. */
  GetRNGstate();

  /* pick up the observed value of the test statistic, then generate a set of
     random permutations (all variable but the second are fixed) and check how
     many tests are greater (in absolute value) than the original one.*/
  switch(INT(test)) {

    case GAUSSIAN_MUTUAL_INFORMATION:
    case LINEAR_CORRELATION:
    case FISHER_Z:
      observed = c_fast_pcor(covariance, &ncols, u, d, vt, &errcode);

      if (errcode)
        error("an error (%d) occurred in the call to dgesvd().\n", errcode);

      for (j = 0; j < (*B); j++) {

        /* reset the error flag of the SVD Fortran routine. */
        errcode = 0;

        RandomPermutation(*num, perm, work);

        for (k = 0; k < *num; k++)
          yperm[k] = yorig[perm[k]];

        /* restore the covariance matrix from the good copy. */
        memcpy(covariance, covariance_backup, ncols * ncols * sizeof(double));
        /* update the relevant covariances. */
        c_update_covmat(column, mean, 1, &ncols, num, covariance);

        permuted = c_fast_pcor(covariance, &ncols, u, d, vt, &errcode);

        if (errcode != 0)
          error_counter++;

        if (fabs(permuted) > fabs(observed)) {

          sequential_counter_check(*res);

        }/*THEN*/

      }/*FOR*/

    if (error_counter > 0)
      warning("unable to compute %d permutations due to errors in dgesvd().\n",
        error_counter);

    break;

  }/*SWITCH*/

  PutRNGstate();

  /* save the observed p-value. */
  *res /= *B;

  UNPROTECT(1);

  return result;

}/*GAUSS_CMCARLO*/
Exemplo n.º 23
0
/* convert an arc set to a weighted edge list. */
SEXP arcs2welist(SEXP arcs, SEXP nodes, SEXP weights, SEXP nid, SEXP sublist,
    SEXP parents) {

int i = 0, j = 0, k = 0, nnodes = length(nodes), narcs = length(arcs)/2;
int *e = NULL, *coords = NULL, *adjacent = NULL;
int num_id = isTRUE(nid), sub = isTRUE(sublist), up = isTRUE(parents);
double *w = REAL(weights), *ew = NULL;
SEXP try, elist, edges, edge_weights, temp, temp_name = R_NilValue;

  /* allocate the return value. */
  PROTECT(elist = allocVector(VECSXP, nnodes));
  /* set the node names. */
  setAttrib(elist, R_NamesSymbol, nodes);

  /* allocate and initialize the subset name. */
  if (sub > 0)
    PROTECT(temp_name = mkStringVec(2, "edges", "weight"));

  /* allocate the scratch space to keep track adjacent nodes. */
  adjacent = alloc1dcont(nnodes);

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  coords = INTEGER(try);

  for (i = 0; i < narcs; i++)
    adjacent[coords[i + up * narcs] - 1]++;

  for (i = 0; i < nnodes; i++) {

    /* allocate and set up the edge array. */
    if (num_id > 0) {

      PROTECT(edges = allocVector(INTSXP, adjacent[i]));
      e = INTEGER(edges);

    }/*THEN*/
    else {

      PROTECT(edges = allocVector(STRSXP, adjacent[i]));

    }/*ELSE*/

    /* allocate and set up the weights array. */
    PROTECT(edge_weights = allocVector(REALSXP, adjacent[i]));
    ew = REAL(edge_weights);

    /* copy the coordinates or the labels of adjacent nodes. */
    for (j = 0, k = 0; j < narcs; j++) {

      if (coords[j + up * narcs] != i + 1)
        continue;

      /* copy the weight as well. */
      ew[k] = w[j];

      if (num_id > 0)
        e[k++] = coords[(1 - up) * narcs + j];
      else
        SET_STRING_ELT(edges, k++, STRING_ELT(arcs, (1 - up) * narcs + j));

      if (k == adjacent[i])
        break;

    }/*FOR*/

    if (sub > 0) {

      /* allocate and set up the "edge" sublist for graphNEL. */
      PROTECT(temp = allocVector(VECSXP, 2));
      setAttrib(temp, R_NamesSymbol, temp_name);
      SET_VECTOR_ELT(temp, 0, edges);
      SET_VECTOR_ELT(temp, 1, edge_weights);
      SET_VECTOR_ELT(elist, i, temp);
      UNPROTECT(1);

    }/*THEN*/
    else {

      /* save weights with edges as names. */
      setAttrib(edge_weights, R_NamesSymbol, edges);
      SET_VECTOR_ELT(elist, i, edge_weights);

    }/*ELSE*/

    UNPROTECT(2);

  }/*FOR*/

  if (sub > 0)
    UNPROTECT(3);
  else
    UNPROTECT(2);

  return elist;

}/*ARCS2WELIST*/
Exemplo n.º 24
0
/* convert an arc set to an edge list. */
SEXP arcs2elist(SEXP arcs, SEXP nodes, SEXP id, SEXP sublist) {

int i = 0, j = 0, k = 0, nnodes = LENGTH(nodes), narcs = LENGTH(arcs)/2;
int *e = NULL, *coords = NULL, *children = NULL;
int *convert = LOGICAL(id), *sub = LOGICAL(sublist);
SEXP try, elist, edges, temp, temp_name = R_NilValue;

  /* allocate the return value. */
  PROTECT(elist = allocVector(VECSXP, nnodes));
  /* set the node names. */
  setAttrib(elist, R_NamesSymbol, nodes);

  if (*sub > 0) {

    /* allocate and initialize the subset name. */
    PROTECT(temp_name = allocVector(STRSXP, 1));
    SET_STRING_ELT(temp_name, 0, mkChar("edges"));

  }/*THEN*/

  /* allocate the scratch space to keep track of the children of each node. */
  children = alloc1dcont(nnodes);

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  coords = INTEGER(try);

  for (i = 0; i < narcs; i++) {

    children[coords[i] - 1]++;

  }/*FOR*/

  for (i = 0; i < nnodes; i++) {

    /* allocate and set up the edge array. */
    if (*convert > 0) {

      PROTECT(edges = allocVector(INTSXP, children[i]));
      e = INTEGER(edges);

    }/*THEN*/
    else {

      PROTECT(edges = allocVector(STRSXP, children[i]));

    }/*ELSE*/

    /* copy the coordinates of the adjacent nodes. */
    for (j = 0, k = 0; j < narcs; j++) {

      if (coords[j] != i + 1)
        continue;

      if (*convert > 0)
        e[k++] = coords[narcs + j];
      else
        SET_STRING_ELT(edges, k++, STRING_ELT(arcs, narcs + j));

      if (k == children[i])
        break;

    }/*FOR*/

    if (*sub > 0) {

      /* allocate and set up the "edge" sublist for graphNEL. */
      PROTECT(temp = allocVector(VECSXP, 1));
      setAttrib(temp, R_NamesSymbol, temp_name);
      SET_VECTOR_ELT(temp, 0, edges);
      SET_VECTOR_ELT(elist, i, temp);
      UNPROTECT(1);

    }/*THEN*/
    else {

      SET_VECTOR_ELT(elist, i, edges);

    }/*ELSE*/

    UNPROTECT(1);

  }/*FOR*/

  if (*sub > 0)
    UNPROTECT(3);
  else
    UNPROTECT(2);

  return elist;

}/*ARCS2ELIST*/
Exemplo n.º 25
0
SEXP is_pdag_acyclic(SEXP arcs, SEXP nodes, SEXP return_nodes, 
    SEXP directed, SEXP debug) {

int i = 0, j = 0, z = 0;
int nrows = LENGTH(nodes);
int check_status = nrows, check_status_old = nrows;
int *rowsums = NULL, *colsums = NULL, *crossprod = NULL, *a = NULL;
int *debuglevel = NULL;
short int *status = NULL;
SEXP amat;

  /* dereference the debug parameter. */
  debuglevel = LOGICAL(debug);

  /* build the adjacency matrix from the arc set.  */
  if (*debuglevel > 0)
    Rprintf("* building the adjacency matrix.\n");

  PROTECT(amat = arcs2amat(arcs, nodes));
  a = INTEGER(amat);

  /* should we consider only directed arcs? */
  if (isTRUE(directed)) {

    /* removing undirected arcs, so that only cycles made only of directed
     * arcs will make the function return TRUE. */

    for (i = 0; i < nrows; i++)
      for (j = 0; j < nrows; j++)
        if ((a[CMC(i, j, nrows)] == 1) && (a[CMC(j, i, nrows)] == 1))
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

  }/*THEN*/

  /* initialize the status, {row,col}sums and crossprod arrays. */
  status = allocstatus(nrows);
  rowsums = alloc1dcont(nrows);
  colsums = alloc1dcont(nrows);
  crossprod = alloc1dcont(nrows);

  if (*debuglevel > 0)
    Rprintf("* checking whether the partially directed graph is acyclic.\n");

  /* even in the worst case scenario at least two nodes are marked as
   * good at each iteration, so even ceil(nrows/2) iterations should be
   * enough. */
  for (z = 0; z < nrows; z++) {

start:

    if (*debuglevel > 0)
      Rprintf("* beginning iteration %d.\n", z + 1);

    for (i = 0; i < nrows; i++) {

      /* skip known-good nodes. */
      if (status[i] == GOOD) continue;

      /* reset and update row and column totals. */
      rowsums[i] = colsums[i] = crossprod[i] = 0;

      /* compute row and column totals for the i-th node. */
      for (j = 0; j < nrows; j++) {

        rowsums[i] += a[CMC(i, j, nrows)];
        colsums[i] += a[CMC(j, i, nrows)];
        crossprod[i] += a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)];

      }/*FOR*/

there:

      if (*debuglevel > 0)
        Rprintf("  > checking node %s (%d child(ren), %d parent(s), %d neighbours).\n",
          NODE(i), rowsums[i], colsums[i], crossprod[i]);

      /* if either total is zero, the node is either a root node or a
       * leaf node, and is not part of any cycle. */
      if (((rowsums[i] == 0) || (colsums[i] == 0)) ||
          ((crossprod[i] == 1) && (rowsums[i] == 1) && (colsums[i] == 1))) {

        if (*debuglevel > 0)
          Rprintf("  @ node %s is cannot be part of a cycle.\n", NODE(i));

        /* update the adjacency matrix and the row/column totals. */
        for (j = 0; j < nrows; j++)
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

        rowsums[i] = colsums[i] = crossprod[i] = 0;

        /* mark the node as good. */
        status[i] = GOOD;
        check_status--;

      }/*THEN*/
      else if (crossprod[i] == 1) {

        /* find the other of the undirected arc. */
        for (j = 0; j < i; j++)
          if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1)
            break;

        /* safety check, just in case. */
        if (i == j) continue;

        if (((colsums[i] == 1) && (colsums[j] == 1)) ||
            ((rowsums[i] == 1) && (rowsums[j] == 1))) {

          if (*debuglevel > 0)
            Rprintf("  @ arc %s - %s is cannot be part of a cycle.\n", NODE(i), NODE(j));

          /* update the adjacency matrix and the row/column totals. */
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;
          crossprod[i] = 0;
          rowsums[i]--;
          colsums[i]--;
          rowsums[j]--;
          colsums[j]--;

          /* jump back to the first check; if either the row or column total
           * was equal to 1 only because of the undirected arc, the node can
           * now be marked as good. */
          if ((rowsums[i] == 0) || (colsums[i] == 0))
            goto there;

        }/*THEN*/

      }/*THEN*/

    }/*FOR*/

    /* at least three nodes are needed to have a cycle. */
    if (check_status < 3) {

      if (*debuglevel > 0)
        Rprintf("@ at least three nodes are needed to have a cycle.\n");

      UNPROTECT(1);
      return build_return_array(nodes, status, nrows, check_status, return_nodes);

    }/*THEN*/

    /* if there are three or more bad nodes and there was no change in
     * the last iteration, the algorithm is stuck on a cycle. */
    if (check_status_old == check_status) {

      if (*debuglevel > 0)
        Rprintf("@ no change in the last iteration.\n");

      /* give up and call c_has_path() to kill some undirected arcs. */
      for (i = 0; i < nrows; i++)
        for (j = 0; j < i; j++)
          if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1) {

            /* remove the arc from the adjacency matrix while testing it,
             * there's a path is always found (the arc itself). */
            a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

            if(!c_has_path(i, j, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE) &&
               !c_has_path(j, i, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE)) {

              if (*debuglevel > 0)
                Rprintf("@ arc %s - %s is not part of any cycle, removing.\n", NODE(i), NODE(j));

              /* increase the iteration counter and start again. */
              z++;
              goto start;

            }/*THEN*/
            else {

              /* at least one cycle is really present; give up and return.  */
              UNPROTECT(1);
              return build_return_array(nodes, status, nrows, check_status, return_nodes);

            }/*ELSE*/

          }/*THEN*/

      /* give up if there are no undirected arcs, cycles composed
       * entirely by directed arcs are never false positives. */
      UNPROTECT(1);
      return build_return_array(nodes, status, nrows, check_status, return_nodes);

    }/*THEN*/
    else {

      check_status_old = check_status;

    }/*ELSE*/

  }/*FOR*/

  UNPROTECT(1);
  return build_return_array(nodes, status, nrows, check_status, return_nodes);

}/*IS_PDAG_ACYCLIC*/
Exemplo n.º 26
0
double castelo_prior(SEXP beta, SEXP target, SEXP parents, SEXP children,
                     SEXP debug) {

    int i = 0, k = 0, t = 0, nnodes = 0, cur_arc = 0;
    int nbeta = LENGTH(VECTOR_ELT(beta, 0));
    int *temp = NULL, *debuglevel = LOGICAL(debug), *aid = INTEGER(VECTOR_ELT(beta, 2));
    double prior = 0, result = 0;
    double *bkwd = REAL(VECTOR_ELT(beta, 4)), *fwd = REAL(VECTOR_ELT(beta, 3));
    short int *adjacent = NULL;
    SEXP nodes, try;

    /* get the node labels. */
    nodes = getAttrib(beta, install("nodes"));
    nnodes = LENGTH(nodes);

    /* match the target node. */
    PROTECT(try = match(nodes, target, 0));
    t = INT(try);
    UNPROTECT(1);

    /* find out which nodes are parents and which nodes are children. */
    adjacent = allocstatus(nnodes);

    PROTECT(try = match(nodes, parents, 0));
    temp = INTEGER(try);
    for (i = 0; i < LENGTH(try); i++)
        adjacent[temp[i] - 1] = PARENT;
    UNPROTECT(1);

    PROTECT(try = match(nodes, children, 0));
    temp = INTEGER(try);
    for (i = 0; i < LENGTH(try); i++)
        adjacent[temp[i] - 1] = CHILD;
    UNPROTECT(1);

    /* prior probabilities table lookup. */
    for (i = t + 1; i <= nnodes; i++) {

        /* compute the arc id. */
        cur_arc = UPTRI3(t, i, nnodes);

        /* look up the prior probability. */
        for (/*k,*/ prior = ((double)1/3); k < nbeta; k++) {

            /* arcs are ordered, so we can stop early in the lookup. */
            if (aid[k] > cur_arc)
                break;

            if (aid[k] == cur_arc) {

                switch(adjacent[i - 1]) {

                case PARENT:
                    prior = bkwd[k];
                    break;
                case CHILD:
                    prior = fwd[k];
                    break;
                default:
                    prior = 1 - bkwd[k] - fwd[k];

                }/*SWITCH*/

                break;

            }/*THEN*/

        }/*FOR*/

        if (*debuglevel > 0) {

            switch(adjacent[i - 1]) {

            case PARENT:
                Rprintf("  > found arc %s -> %s, prior pobability is %lf.\n",
                        NODE(i - 1), NODE(t - 1), prior);
                break;
            case CHILD:
                Rprintf("  > found arc %s -> %s, prior probability is %lf.\n",
                        NODE(t - 1), NODE(i - 1), prior);
                break;
            default:
                Rprintf("  > no arc between %s and %s, prior probability is %lf.\n",
                        NODE(t - 1), NODE(i - 1), prior);

            }/*SWITCH*/

        }/*THEN*/

        /* move to log-scale and divide by the non-informative log(1/3), so that
         * the contribution of each arc whose prior has not been not specified by
         * the user is zero; overflow is likely otherwise. */
        result += log(prior / ((double)1/3));

    }/*FOR*/

    return result;

}/*CASTELO_PRIOR*/

/* complete a prior as per Castelo & Siebes. */
SEXP castelo_completion(SEXP prior, SEXP nodes)  {

    int i = 0, k = 0, cur = 0, narcs1 = 0, narcs2 = 0, nnodes = LENGTH(nodes);
    int *m1 = NULL, *m2 = NULL, *und = NULL, *aid = NULL, *poset = NULL, *id = NULL;
    double *d1 = NULL, *d2 = NULL, *p = NULL;
    SEXP df, arc_id, undirected, a1, a2, match1, match2, prob;
    SEXP result, colnames, from, to, nid, dir1, dir2;

    /* compute numeric IDs for the arcs. */
    a1 = VECTOR_ELT(prior, 0);
    a2 = VECTOR_ELT(prior, 1);
    narcs1 = LENGTH(a1);
    PROTECT(match1 = match(nodes, a1, 0));
    PROTECT(match2 = match(nodes, a2, 0));
    m1 = INTEGER(match1);
    m2 = INTEGER(match2);
    PROTECT(arc_id = allocVector(INTSXP, narcs1));
    aid = INTEGER(arc_id);

    c_arc_hash(&narcs1, &nnodes, m1, m2, aid, NULL, TRUE);

    /* duplicates correspond to undirected arcs. */
    PROTECT(undirected = dupe(arc_id));
    und = INTEGER(undirected);

    /* extract the components from the prior. */
    prob = VECTOR_ELT(prior, 2);
    p = REAL(prob);

    /* count output arcs. */
    for (i = 0; i < narcs1; i++)
        narcs2 += 2 - und[i];
    narcs2 /= 2;

    /* allocate the columns of the return value. */
    PROTECT(from = allocVector(STRSXP, narcs2));
    PROTECT(to = allocVector(STRSXP, narcs2));
    PROTECT(nid = allocVector(INTSXP, narcs2));
    id = INTEGER(nid);
    PROTECT(dir1 = allocVector(REALSXP, narcs2));
    d1 = REAL(dir1);
    PROTECT(dir2 = allocVector(REALSXP, narcs2));
    d2 = REAL(dir2);

    /* sort the strength coefficients. */
    poset = alloc1dcont(narcs1);
    for (k = 0; k < narcs1; k++)
        poset[k] = k;
    R_qsort_int_I(aid, poset, 1, narcs1);

    for (i = 0, k = 0; i < narcs1; i++) {

        cur = poset[i];

#define ASSIGN(A1, A2, D1, D2) \
  SET_STRING_ELT(from,  k, STRING_ELT(A1, cur)); \
  SET_STRING_ELT(to,  k, STRING_ELT(A2, cur)); \
  id[k] = aid[i]; \
  D1[k] = p[cur]; \
  if ((und[cur] == TRUE) && (i < narcs1 - 1)) \
    D2[k] = p[poset[++i]]; \
  else \
    D2[k] = (1 - D1[k])/2;

        /* copy the node labels. */
        if (m1[cur] < m2[cur]) {

            ASSIGN(a1, a2, d1, d2);

        }/*THEN*/
        else {

            ASSIGN(a2, a1, d2, d1);

        }/*ELSE*/

        if (d1[k] + d2[k] > 1) {

            UNPROTECT(9);

            error("the probabilities for arc %s -> %s sum to %lf.",
                  CHAR(STRING_ELT(from, k)), CHAR(STRING_ELT(to, k)), d1[k] + d2[k]);

        }/*THEN*/

        /* move to the next arc. */
        k++;

    }/*FOR*/

    /* set up the return value. */
    PROTECT(result = allocVector(VECSXP, 5));
    SET_VECTOR_ELT(result, 0, from);
    SET_VECTOR_ELT(result, 1, to);
    SET_VECTOR_ELT(result, 2, nid);
    SET_VECTOR_ELT(result, 3, dir1);
    SET_VECTOR_ELT(result, 4, dir2);
    PROTECT(colnames = allocVector(STRSXP, 5));
    SET_STRING_ELT(colnames, 0, mkChar("from"));
    SET_STRING_ELT(colnames, 1, mkChar("to"));
    SET_STRING_ELT(colnames, 2, mkChar("aid"));
    SET_STRING_ELT(colnames, 3, mkChar("fwd"));
    SET_STRING_ELT(colnames, 4, mkChar("bkwd"));
    setAttrib(result, R_NamesSymbol, colnames);
    PROTECT(df = minimal_data_frame(result));

    UNPROTECT(12);

    return df;

}/*CASTELO_COMPLETION*/
Exemplo n.º 27
0
/* predict the value of the training variable in a naive Bayes or Tree-Augmented
 * naive Bayes classifier. */
SEXP naivepred(SEXP fitted, SEXP data, SEXP parents, SEXP training, SEXP prior,
    SEXP prob, SEXP debug) {

int i = 0, j = 0, k = 0, n = 0, nvars = LENGTH(fitted), nmax = 0, tr_nlevels = 0;
int *res = NULL, **ex = NULL, *ex_nlevels = NULL;
int idx = 0, *tr_id = INTEGER(training);
int *iscratch = NULL, *maxima = NULL, *prn = NULL, *debuglevel = LOGICAL(debug);
int *include_prob = LOGICAL(prob);
double **cpt = NULL, *pr = NULL, *scratch = NULL, *buf = NULL, *pt = NULL;
double sum = 0;
SEXP class, temp, tr, tr_levels, result, nodes, probtab, dimnames;

  /* cache the node labels. */
  nodes = getAttrib(fitted, R_NamesSymbol);

  /* cache the pointers to all the variables. */
  ex = (int **) alloc1dpointer(nvars);
  ex_nlevels = alloc1dcont(nvars);

  for (i = 0; i < nvars; i++) {

    temp = VECTOR_ELT(data, i);
    ex[i] = INTEGER(temp);
    ex_nlevels[i] = NLEVELS(temp);

  }/*FOR*/

  /* get the training variable and its levels. */
  n = LENGTH(VECTOR_ELT(data, 0));
  tr = getListElement(VECTOR_ELT(fitted, *tr_id - 1), "prob");
  tr_levels = VECTOR_ELT(getAttrib(tr, R_DimNamesSymbol), 0);
  tr_nlevels = LENGTH(tr_levels);
  /* get the prior distribution. */
  pr = REAL(prior);

  if (*debuglevel > 0) {

    Rprintf("* the prior distribution for the target variable is:\n");
    PrintValue(prior);

  }/*THEN*/

  /* allocate the scratch space used to compute posterior probabilities. */
  scratch = alloc1dreal(tr_nlevels);
  buf = alloc1dreal(tr_nlevels);

  /* cache the pointers to the conditional probability tables. */
  cpt = (double **) alloc1dpointer(nvars);

  for (i = 0; i < nvars; i++) 
    cpt[i] = REAL(getListElement(VECTOR_ELT(fitted, i), "prob"));

  /* dereference the parents' vector. */
  prn = INTEGER(parents);

  /* create the vector of indexes. */
  iscratch = alloc1dcont(tr_nlevels);

  /* allocate the array for the indexes of the maxima. */
  maxima = alloc1dcont(tr_nlevels);

  /* allocate the return value. */
  PROTECT(result = allocVector(INTSXP, n));
  res = INTEGER(result);

  /* allocate and initialize the table of the posterior probabilities. */
  if (*include_prob > 0) {

    PROTECT(probtab = allocMatrix(REALSXP, tr_nlevels, n));
    pt = REAL(probtab);
    memset(pt, '\0', n * tr_nlevels * sizeof(double));

  }/*THEN*/

  /* initialize the random seed, just in case we need it for tie breaking. */
  GetRNGstate();

  /* for each observation... */
  for (i = 0; i < n; i++) {

    /* ... reset the scratch space and the indexes array... */
    for (k = 0; k < tr_nlevels; k++) {

      scratch[k] = log(pr[k]);
      iscratch[k] = k + 1;

    }/*FOR*/

    if (*debuglevel > 0)
      Rprintf("* predicting the value of observation %d.\n", i + 1);

    /* ... and for each conditional probability table... */
    for (j = 0; j < nvars; j++) {

      /* ... skip the training variable... */
      if (*tr_id == j + 1)
        continue;

      /* ... (this is the root node of the Chow-Liu tree) ... */
      if (prn[j] == NA_INTEGER) {

        /* ... and for each row of the conditional probability table... */
        for (k = 0; k < tr_nlevels; k++) {

          if (*debuglevel > 0) {

            Rprintf("  > node %s: picking cell %d (%d, %d) from the CPT (p = %lf).\n",
              NODE(j), CMC(ex[j][i] - 1, k, ex_nlevels[j]), ex[j][i], k + 1,
              cpt[j][CMC(ex[j][i] - 1, k, ex_nlevels[j])]);

          }/*THEN*/

          /* ... update the posterior probability. */
          scratch[k] += log(cpt[j][CMC(ex[j][i] - 1, k, ex_nlevels[j])]);

        }/*FOR*/

      }/*THEN*/
      else {

        /* ... and for each row of the conditional probability table... */
        for (k = 0; k < tr_nlevels; k++) {

          /* (the first dimension corresponds to the current node [X], the second
           * to the training node [Y], the third to the only parent of the current
           * node [Z]; CMC coordinates are computed as X + Y * NX + Z * NX * NY. */
          idx = (ex[j][i] - 1) + k * ex_nlevels[j] + 
                  (ex[prn[j] - 1][i] - 1) * ex_nlevels[j] * tr_nlevels;

          if (*debuglevel > 0) {

            Rprintf("  > node %s: picking cell %d (%d, %d, %d) from the CPT (p = %lf).\n",
              NODE(j), idx, ex[j][i], k + 1, ex[prn[j] - 1][i], cpt[j][idx]);

          }/*THEN*/

          /* ... update the posterior probability. */
          scratch[k] += log(cpt[j][idx]);

        }/*FOR*/

      }/*ELSE*/

    }/*FOR*/

    /* find out the mode(s). */
    all_max(scratch, tr_nlevels, maxima, &nmax, iscratch, buf);

    /* compute the posterior probabilities on the right scale, to attach them
     * to the return value. */
    if (*include_prob) {

      /* copy the log-probabilities from scratch. */
      memcpy(pt + i * tr_nlevels, scratch, tr_nlevels * sizeof(double));

      /* transform log-probabilitiees into plain probabilities. */
      for (k = 0, sum = 0; k < tr_nlevels; k++)
        sum += pt[i * tr_nlevels + k] = exp(pt[i * tr_nlevels + k] - scratch[maxima[0] - 1]);

      /* rescale them to sum up to 1. */
      for (k = 0; k < tr_nlevels; k++)
        pt[i * tr_nlevels + k] /= sum;

    }/*THEN*/

    if (nmax == 1) {

      res[i] = maxima[0];

      if (*debuglevel > 0) {

        Rprintf("  @ prediction for observation %d is '%s' with (log-)posterior:\n",
          i + 1, CHAR(STRING_ELT(tr_levels, res[i] - 1)));

        Rprintf("  ");
        for (k = 0; k < tr_nlevels; k++)
          Rprintf("  %lf", scratch[k]);
        Rprintf("\n");

      }/*THEN*/

    }/*THEN*/
    else {

      /* break ties: sample with replacement from all the maxima. */
      SampleReplace(1, nmax, res + i, maxima);

      if (*debuglevel > 0) {

        Rprintf("  @ there are %d levels tied for prediction of observation %d, applying tie breaking.\n", nmax, i + 1);

        Rprintf("  ");
        for (k = 0; k < tr_nlevels; k++)
          Rprintf("  %lf", scratch[k]);
        Rprintf("\n");

        Rprintf("  @ tied levels are:");
        for (k = 0; k < nmax; k++)
          Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[k] - 1)));
        Rprintf(".\n");

      }/*THEN*/

    }/*ELSE*/

  }/*FOR*/

  /* save the state of the random number generator. */
  PutRNGstate();

  /* add back the attributes and the class to the return value. */
  PROTECT(class = allocVector(STRSXP, 1));
  SET_STRING_ELT(class, 0, mkChar("factor"));
  setAttrib(result, R_LevelsSymbol, tr_levels);
  setAttrib(result, R_ClassSymbol, class);

  if (*include_prob > 0) {

    /* set the levels of the taregt variable as rownames. */
    PROTECT(dimnames = allocVector(VECSXP, 2));
    SET_VECTOR_ELT(dimnames, 0, tr_levels);
    setAttrib(probtab, R_DimNamesSymbol, dimnames);
    /* add the posterior probabilities to the return value. */
    setAttrib(result, install("prob"), probtab);

    UNPROTECT(4);

  }/*THEN*/
  else {

    UNPROTECT(2);

  }/*ELSE*/

  return result;

}/*NAIVEPRED*/
Exemplo n.º 28
0
/* unconditional Monte Carlo simulation for discrete tests. */
SEXP mcarlo(SEXP x, SEXP y, SEXP lx, SEXP ly, SEXP length, SEXP samples,
    SEXP test, SEXP alpha) {

double *fact = NULL, *res = NULL, observed = 0;
int *n = NULL, *ncolt = NULL, *nrowt = NULL, *workspace = NULL;
int *num = INTEGER(length), *nr = INTEGER(lx), *nc = INTEGER(ly);
int *xx = INTEGER(x), *yy = INTEGER(y), *B = INTEGER(samples);
int i = 0, k = 0, npermuts = 0, enough = ceil(NUM(alpha) * (*B)) + 1;
SEXP result;

  /* allocate and initialize the result. */
  PROTECT(result = allocVector(REALSXP, 3));
  res = REAL(result);
  res[0] = res[1] = res[2] = 0; // initial test score / p-value / nb permutations

  /* allocate and compute the factorials needed by rcont2. */
  allocfact(*num);

  /* allocate and initialize the workspace for rcont2. */
  workspace = alloc1dcont(*nc);

  /* initialize the contingency table. */
  n = alloc1dcont(*nr * (*nc));

  /* initialize the marginal frequencies. */
  nrowt = alloc1dcont(*nr);
  ncolt = alloc1dcont(*nc);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < *num; k++)
    n[CMC(xx[k] - 1, yy[k] - 1, *nr)]++;

  /* compute the marginals. */
  for (i = 0; i < *nr; i++)
    for (k = 0; k < *nc; k++) {

      nrowt[i] += n[CMC(i, k, *nr)];
      ncolt[k] += n[CMC(i, k, *nr)];

    }/*FOR*/

  /* initialize the random number generator. */
  GetRNGstate();

  /* pick up the observed value of the test statistic, then generate a set of
     random contingency tables (given row and column totals) and check how many
     tests are greater than the original one.*/
  switch(INT(test)) {

    case MUTUAL_INFORMATION:
      observed = _mi(n, nrowt, ncolt, nr, nc, num);

      for (k = 0; k < *B; k++) {

        rcont2(nr, nc, nrowt, ncolt, num, fact, workspace, n);

        if (_mi(n, nrowt, ncolt, nr, nc, num) > observed) {

          sequential_counter_check(res[1]);

        }/*THEN*/

        npermuts++;

      }/*FOR*/

      observed = 2 * observed;

      break;

    case PEARSON_X2:
      observed = _x2(n, nrowt, ncolt, nr, nc, num);

      for (k = 0; k < *B; k++) {

        rcont2(nr, nc, nrowt, ncolt, num, fact, workspace, n);

        if (_x2(n, nrowt, ncolt, nr, nc, num) > observed) {

          sequential_counter_check(res[1]);

        }/*THEN*/

        npermuts++;

      }/*FOR*/

      break;

  }/*SWITCH*/

  PutRNGstate();

  /* save the observed value of the statistic, the corresponding
     p-value, and the number of permutations performed. */
  res[0] = observed;
  res[1] /= (*B);
  res[2] = npermuts;

  UNPROTECT(1);

  return result;

}/*MCARLO*/
Exemplo n.º 29
0
/* generate a connected graph with uniform probability, subject to some
 * constraints on the degree of the nodes. */
SEXP ide_cozman_graph(SEXP nodes, SEXP num, SEXP burn_in, SEXP max_in_degree,
    SEXP max_out_degree, SEXP max_degree, SEXP connected, SEXP debug) {

int i = 0, k = 0, nnodes = LENGTH(nodes), *n = INTEGER(num);
int changed = 0, *work = NULL, *arc = NULL, *a = NULL, *burn = INTEGER(burn_in);
int *degree = NULL, *in_degree = NULL, *out_degree = NULL;
int *debuglevel = LOGICAL(debug), *cozman = LOGICAL(connected);
double *max_in = REAL(max_in_degree), *max_out = REAL(max_out_degree),
  *max = REAL(max_degree);
SEXP list, res, args, argnames, amat, arcs, cached, debug2, null, temp;
char *label = (*cozman > 0) ? "ic-dag" : "melancon";

  /* a fake debug argument (set to FALSE) for cache_structure(). */
  PROTECT(debug2 = allocVector(LGLSXP, 1));
  LOGICAL(debug2)[0] = FALSE;

  /* the list of optional arguments. */
  PROTECT(argnames = allocVector(STRSXP, 4));
  SET_STRING_ELT(argnames, 0, mkChar("burn.in"));
  SET_STRING_ELT(argnames, 1, mkChar("max.in.degree"));
  SET_STRING_ELT(argnames, 2, mkChar("max.out.degree"));
  SET_STRING_ELT(argnames, 3, mkChar("max.degree"));

  PROTECT(args = allocVector(VECSXP, 4));
  setAttrib(args, R_NamesSymbol, argnames);
  SET_VECTOR_ELT(args, 0, burn_in);
  SET_VECTOR_ELT(args, 1, max_in_degree);
  SET_VECTOR_ELT(args, 2, max_out_degree);
  SET_VECTOR_ELT(args, 3, max_degree);

  /* allocate and initialize the adjacency matrix. */
  PROTECT(amat = allocMatrix(INTSXP, nnodes, nnodes));
  a = INTEGER(amat);
  memset(a, '\0', nnodes * nnodes * sizeof(int));

  /* initialize a simple ordered tree with n nodes, where all nodes
   * have just one parent, except the first one that does not have
   * any parent. */
  for (i = 1; i < nnodes; i++)
    a[CMC(i - 1, i, nnodes)] = 1;

  /* allocate the arrays needed by SampleNoReplace. */
  arc = alloc1dcont(2);
  work = alloc1dcont(nnodes);

  /* allocate and initialize the degree arrays. */
  degree = alloc1dcont(nnodes);
  in_degree = alloc1dcont(nnodes);
  out_degree = alloc1dcont(nnodes);

  for (i = 0; i < nnodes; i++) {

    in_degree[i] = out_degree[i] = 1;
    degree[i] = 2;

  }/*FOR*/
  in_degree[0] = out_degree[nnodes - 1] = 0;
  degree[0] = degree[nnodes - 1] = 1;

  GetRNGstate();

  /* wait for the markov chain monte carlo simulation to reach stationarity. */
  for (k = 0; k < *burn; k++) {

    if (*debuglevel > 0)
      Rprintf("* current model (%d):\n", k + 1);

    changed = ic_logic(a, nodes, &nnodes, arc, work, degree, max, in_degree, max_in,
                out_degree, max_out, cozman, debuglevel);

    /* print the model string to allow a sane debugging experience; note that this
     * has a huge impact on performance, so use it with care. */
    if ((*debuglevel > 0) && (changed)) {

      PROTECT(null = allocVector(NILSXP, 1));
      PROTECT(res = bn_base_structure(nodes, args, null, null, 0, "none", label));
      PROTECT(arcs = amat2arcs(amat, nodes));
      PROTECT(cached = cache_structure(nodes, amat, debug2));
      SET_VECTOR_ELT(res, 1, cached);
      SET_VECTOR_ELT(res, 2, arcs);
      print_modelstring(res);
      UNPROTECT(4);

    }/*THEN*/

  }/*FOR*/

#define UPDATE_NODE_CACHE(cur) \
          if (*debuglevel > 0) \
            Rprintf("  > updating cached information about node %s.\n", NODE(cur)); \
          memset(work, '\0', nnodes * sizeof(int)); \
          PROTECT(temp = c_cache_partial_structure(cur, nodes, amat, work, debug2)); \
          SET_VECTOR_ELT(cached, cur, temp); \
          UNPROTECT(1);

  /* return a list if more than one bn is generated. */
  if (*n > 1) {

    if (*debuglevel > 0)
      Rprintf("* end of the burn-in iterations.\n");

    PROTECT(list = allocVector(VECSXP, *n));
    PROTECT(null = allocVector(NILSXP, 1));

    /* generate the "bn" structure, with dummy NULLs for the "arcs" and
     * "nodes" elements (which will be initialized later on). */
    PROTECT(res = bn_base_structure(nodes, args, null, null, 0, "none", label));

    for (k = 0; k < *n; k++) {

      if (*debuglevel > 0)
        Rprintf("* current model (%d):\n", *burn + k + 1);

      changed = ic_logic(a, nodes, &nnodes, arc, work, degree, max, in_degree,
                  max_in, out_degree, max_out, cozman, debuglevel);

      if (changed || (k == 0)) {

        /* generate the arc set and the cached information from the adjacency
         * matrix. */
        if (k > 0) {

          /* if a complete "bn" object is available, we can retrieve the cached
           * information about the nodes from the structure stored in the last
           * iteration and update only the elements that really need it. */
          temp = VECTOR_ELT(VECTOR_ELT(list, k - 1), 1);
          PROTECT(cached = duplicate(temp));

          /* update the first sampled nodes; both of them gain/lose either
           * a parent or a child.  */
          UPDATE_NODE_CACHE(arc[0] - 1);
          UPDATE_NODE_CACHE(arc[1] - 1);

          /* all the parents of the second sampled node gain/lose a node in
           * the markov blanket (the first sampled node, which shares a child
           * with all of them). */
          for (i = 0; i < nnodes; i++) {

            if ((i != arc[0] - 1) && (a[CMC(i, arc[1] - 1, nnodes)] == 1)) {

              UPDATE_NODE_CACHE(i);

            }/*THEN*/

          }/*FOR*/

        }/*THEN*/
        else {

          PROTECT(cached = cache_structure(nodes, amat, debug2));

        }/*ELSE*/

        PROTECT(arcs = amat2arcs(amat, nodes));
        SET_VECTOR_ELT(res, 1, cached);
        SET_VECTOR_ELT(res, 2, arcs);

        /* print the model string to allow a sane debugging experience. */
        if (*debuglevel > 0)
          print_modelstring(res);

        /* save the structure in the list. */
        PROTECT(temp = duplicate(res));
        SET_VECTOR_ELT(list, k, temp);

        UNPROTECT(3);

      }/*THEN*/
      else {

        /* the adjacency matrix is unchanged; so we can just copy the bayesian
         * network from the previous iteration in the k-th slot of the list. */
        SET_VECTOR_ELT(list, k, VECTOR_ELT(list, k - 1));

      }/*ELSE*/

    }/*FOR*/

    PutRNGstate();

    UNPROTECT(7);
    return list;

  }/*THEN*/
  else {

    if (*debuglevel > 0)
      Rprintf("* end of the burn-in.\n* current model (%d):\n", *burn + 1);

    ic_logic(a, nodes, &nnodes, arc, work, degree, max, in_degree,
      max_in, out_degree, max_out, cozman, debuglevel);

    /* generate the arc set and the cached information form the adjacency
     * matrix. */
    PROTECT(arcs = amat2arcs(amat, nodes));
    PROTECT(cached = cache_structure(nodes, amat, debug2));

    /* generate the "bn" structure. */
    PROTECT(res = bn_base_structure(nodes, args, arcs, cached, 0, "none", label));

    /* print the model string to allow a sane debugging experience. */
    if (*debuglevel > 0)
      print_modelstring(res);

    PutRNGstate();

    UNPROTECT(7);
    return res;

  }/*ELSE*/

}/*IDE_COZMAN_GRAPH*/
Exemplo n.º 30
0
/* Chow-Liu structure learning algorithm. */
SEXP chow_liu(SEXP data, SEXP nodes, SEXP estimator, SEXP whitelist,
    SEXP blacklist, SEXP conditional, SEXP debug) {

int i = 0, j = 0, k = 0, debug_coord[2], ncols = LENGTH(data);
int num = LENGTH(VECTOR_ELT(data, i)), narcs = 0, nwl = 0, nbl = 0;
int *nlevels = NULL, *clevels = NULL, *est = INTEGER(estimator);
int *wl = NULL, *bl = NULL, *poset = NULL, *debuglevel = LOGICAL(debug);
void **columns = NULL, *cond = NULL;
short int *include = NULL;
double *mim = NULL;
SEXP arcs, wlist, blist;

  /* dereference the columns of the data frame. */
  DEREFERENCE_DATA_FRAME()

  /* only TAN uses a conditional variable, so assume it's discrete and go ahead. */
  if (conditional != R_NilValue) {

    cond = (void *) INTEGER(conditional);
    clevels = alloc1dcont(1);
    *clevels = NLEVELS(conditional); 

  }/*THEN*/

  /* allocate the mutual information matrix and the status vector. */
  mim = alloc1dreal(UPTRI3_MATRIX(ncols));
  include = allocstatus(UPTRI3_MATRIX(ncols));

  /* compute the pairwise mutual information coefficients. */
  if (*debuglevel > 0)
    Rprintf("* computing pairwise mutual information coefficients.\n");

  mi_matrix(mim, columns, ncols, nlevels, &num, cond, clevels, est);

  LIST_MUTUAL_INFORMATION_COEFS()

  /* add whitelisted arcs first. */
  if ((!isNull(whitelist)) && (LENGTH(whitelist) > 0)) {

    PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE));
    wl = INTEGER(wlist);
    nwl = LENGTH(wlist);

    for (i = 0; i < nwl; i++) {

      if (*debuglevel > 0) {

        Rprintf("* adding whitelisted arcs first.\n");

        if (include[wl[i]] == 0) {

          Rprintf("  > arc %s - %s has been added to the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was already present in the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (include[wl[i]] == 0)
        narcs++;
      /* include the arc in the graph. */
      include[wl[i]] = 1;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  /* cache blacklisted arcs. */
  if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0)) {

    PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE));
    bl = INTEGER(blist);
    nbl = LENGTH(blist);

  }/*THEN*/

  /* sort the mutual information coefficients and keep track of the elements' index.  */
  poset = alloc1dcont(UPTRI3_MATRIX(ncols));
  for (i = 0; i < UPTRI3_MATRIX(ncols); i++)
    poset[i] = i;
  R_qsort_I(mim, poset, 1, UPTRI3_MATRIX(ncols));

  for (i = UPTRI3_MATRIX(ncols) - 1; i > 0; i--) {

    /* get back the coordinates from the position in the half-matrix. */
    INV_UPTRI3(poset[i], ncols, debug_coord);

    /* already included all the arcs we had to, exiting. */
    if (narcs >= ncols - 1)
      break;
    /* arc already present in the graph, nothing to do. */
    if (include[poset[i]] == 1)
      continue;

    if (bl) {

      if (chow_liu_blacklist(bl, &nbl, poset + i)) {

        if (*debuglevel > 0) {

          Rprintf("* arc %s - %s is blacklisted, skipping.\n",
            NODE(debug_coord[0]), NODE(debug_coord[1]));

        }/*THEN*/

        continue;

      }/*THEN*/

    }/*THEN*/

    if (c_uptri3_path(include, debug_coord[0], debug_coord[1], ncols, nodes, FALSE)) {

      if (*debuglevel > 0) {

        Rprintf("* arc %s - %s introduces cycles, skipping.\n",
          NODE(debug_coord[0]), NODE(debug_coord[1]));

      }/*THEN*/

      continue;

    }/*THEN*/

    if (*debuglevel > 0) {

      Rprintf("* adding arc %s - %s with mutual information %lf.\n",
        NODE(debug_coord[0]), NODE(debug_coord[1]), mim[i]);

    }/*THEN*/

    /* include the arc in the graph. */
    include[poset[i]] = 1;
    /* update the counter. */
    narcs++;

  }/*FOR*/

  if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0))
    UNPROTECT(1);

  /* sanity check for blacklist-related madnes. */
  if (narcs != ncols - 1)
    error("learned %d arcs instead of %d, this is not a tree spanning all the nodes.",
      narcs, ncols - 1);

  CONVERT_TO_ARC_SET(include, 0, 2 * (ncols - 1));

  return arcs;

}/*CHOW_LIU*/