Пример #1
0
/* determine whether a graph is DAG or a PDAG/UG. */
SEXP is_dag(SEXP arcs, SEXP nnodes) {

int i = 0, nrows = length(arcs)/2, n = INT(nnodes);
int *a = INTEGER(arcs);
short int *checklist = NULL;

  /* allocate and initialize the checklist. */
  checklist = allocstatus(UPTRI_MATRIX(n));

  for (i = 0; i < nrows; i++) {

    if (checklist[UPTRI(a[CMC(i, 0, nrows)], a[CMC(i, 1, nrows)], n)] == 0) {

      /* this arc is not present in the checklist; add it. */
      checklist[UPTRI(a[CMC(i, 0, nrows)], a[CMC(i, 1, nrows)], n)] = 1;

    }/*THEN*/
    else {

      /* this arc or its opposite already present in the checklist; the graph
       * has at least an undirected arc, so return FALSE. */
      return ScalarLogical(FALSE);

    }/*THEN*/

  }/*FOR*/

  return ScalarLogical(TRUE);

}/*IS_DAG*/
Пример #2
0
/* get the Markov blanket of a node from a fitted model. */
SEXP fitted_mb(SEXP bn, SEXP target) {

int i = 0, j = 0, target_node = 0, nnodes = 0, counter = 0;
int *matched = NULL;
short int *status = NULL;
SEXP mb, labels, try, temp, node_data;

  /* get the nodes' labels. */
  labels = getAttrib(bn, R_NamesSymbol);
  nnodes = LENGTH(labels);
  /* allocate and initialize a status vector. */
  status = allocstatus(nnodes);

  /* match the label of the target node. */
  PROTECT(try = match(labels, target, 0));
  target_node = INT(try);
  UNPROTECT(1);

  /* mark the target node as such. */
  status[target_node - 1] = TARGET;

  /* match the parents and the children of the target node. */
  node_data = VECTOR_ELT(bn, target_node - 1);
  MATCH_NODES("parents", PARENT);
  MATCH_NODES("children", CHILD);

  /* now match the parents of each child. */
  for (j = 0; j < nnodes; j++) {

    /* this is not a child, go on. */
    if (status[j] != CHILD)
      continue;

      node_data = VECTOR_ELT(bn, j);
      MATCH_NODES("parents", BLANKET);

  }/*FOR*/

  /* a node is not considered part of its own Markov blanket. */
  status[target_node - 1] = 0;

  /* allocate and initialize the result. */
  PROTECT(mb = allocVector(STRSXP, counter));
  for (i = 0, j = 0; i < nnodes; i++)
    if (status[i] != 0)
      SET_STRING_ELT(mb, j++, STRING_ELT(labels, i));

  UNPROTECT(1);

  return mb;

}/*FITTED_MB*/
Пример #3
0
SEXP is_pdag_acyclic(SEXP arcs, SEXP nodes, SEXP return_nodes, 
    SEXP directed, SEXP debug) {

int i = 0, j = 0, z = 0;
int nrows = LENGTH(nodes);
int check_status = nrows, check_status_old = nrows;
int *rowsums = NULL, *colsums = NULL, *crossprod = NULL, *a = NULL;
int *debuglevel = NULL;
short int *status = NULL;
SEXP amat;

  /* dereference the debug parameter. */
  debuglevel = LOGICAL(debug);

  /* build the adjacency matrix from the arc set.  */
  if (*debuglevel > 0)
    Rprintf("* building the adjacency matrix.\n");

  PROTECT(amat = arcs2amat(arcs, nodes));
  a = INTEGER(amat);

  /* should we consider only directed arcs? */
  if (isTRUE(directed)) {

    /* removing undirected arcs, so that only cycles made only of directed
     * arcs will make the function return TRUE. */

    for (i = 0; i < nrows; i++)
      for (j = 0; j < nrows; j++)
        if ((a[CMC(i, j, nrows)] == 1) && (a[CMC(j, i, nrows)] == 1))
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

  }/*THEN*/

  /* initialize the status, {row,col}sums and crossprod arrays. */
  status = allocstatus(nrows);
  rowsums = alloc1dcont(nrows);
  colsums = alloc1dcont(nrows);
  crossprod = alloc1dcont(nrows);

  if (*debuglevel > 0)
    Rprintf("* checking whether the partially directed graph is acyclic.\n");

  /* even in the worst case scenario at least two nodes are marked as
   * good at each iteration, so even ceil(nrows/2) iterations should be
   * enough. */
  for (z = 0; z < nrows; z++) {

start:

    if (*debuglevel > 0)
      Rprintf("* beginning iteration %d.\n", z + 1);

    for (i = 0; i < nrows; i++) {

      /* skip known-good nodes. */
      if (status[i] == GOOD) continue;

      /* reset and update row and column totals. */
      rowsums[i] = colsums[i] = crossprod[i] = 0;

      /* compute row and column totals for the i-th node. */
      for (j = 0; j < nrows; j++) {

        rowsums[i] += a[CMC(i, j, nrows)];
        colsums[i] += a[CMC(j, i, nrows)];
        crossprod[i] += a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)];

      }/*FOR*/

there:

      if (*debuglevel > 0)
        Rprintf("  > checking node %s (%d child(ren), %d parent(s), %d neighbours).\n",
          NODE(i), rowsums[i], colsums[i], crossprod[i]);

      /* if either total is zero, the node is either a root node or a
       * leaf node, and is not part of any cycle. */
      if (((rowsums[i] == 0) || (colsums[i] == 0)) ||
          ((crossprod[i] == 1) && (rowsums[i] == 1) && (colsums[i] == 1))) {

        if (*debuglevel > 0)
          Rprintf("  @ node %s is cannot be part of a cycle.\n", NODE(i));

        /* update the adjacency matrix and the row/column totals. */
        for (j = 0; j < nrows; j++)
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

        rowsums[i] = colsums[i] = crossprod[i] = 0;

        /* mark the node as good. */
        status[i] = GOOD;
        check_status--;

      }/*THEN*/
      else if (crossprod[i] == 1) {

        /* find the other of the undirected arc. */
        for (j = 0; j < i; j++)
          if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1)
            break;

        /* safety check, just in case. */
        if (i == j) continue;

        if (((colsums[i] == 1) && (colsums[j] == 1)) ||
            ((rowsums[i] == 1) && (rowsums[j] == 1))) {

          if (*debuglevel > 0)
            Rprintf("  @ arc %s - %s is cannot be part of a cycle.\n", NODE(i), NODE(j));

          /* update the adjacency matrix and the row/column totals. */
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;
          crossprod[i] = 0;
          rowsums[i]--;
          colsums[i]--;
          rowsums[j]--;
          colsums[j]--;

          /* jump back to the first check; if either the row or column total
           * was equal to 1 only because of the undirected arc, the node can
           * now be marked as good. */
          if ((rowsums[i] == 0) || (colsums[i] == 0))
            goto there;

        }/*THEN*/

      }/*THEN*/

    }/*FOR*/

    /* at least three nodes are needed to have a cycle. */
    if (check_status < 3) {

      if (*debuglevel > 0)
        Rprintf("@ at least three nodes are needed to have a cycle.\n");

      UNPROTECT(1);
      return build_return_array(nodes, status, nrows, check_status, return_nodes);

    }/*THEN*/

    /* if there are three or more bad nodes and there was no change in
     * the last iteration, the algorithm is stuck on a cycle. */
    if (check_status_old == check_status) {

      if (*debuglevel > 0)
        Rprintf("@ no change in the last iteration.\n");

      /* give up and call c_has_path() to kill some undirected arcs. */
      for (i = 0; i < nrows; i++)
        for (j = 0; j < i; j++)
          if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1) {

            /* remove the arc from the adjacency matrix while testing it,
             * there's a path is always found (the arc itself). */
            a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

            if(!c_has_path(i, j, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE) &&
               !c_has_path(j, i, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE)) {

              if (*debuglevel > 0)
                Rprintf("@ arc %s - %s is not part of any cycle, removing.\n", NODE(i), NODE(j));

              /* increase the iteration counter and start again. */
              z++;
              goto start;

            }/*THEN*/
            else {

              /* at least one cycle is really present; give up and return.  */
              UNPROTECT(1);
              return build_return_array(nodes, status, nrows, check_status, return_nodes);

            }/*ELSE*/

          }/*THEN*/

      /* give up if there are no undirected arcs, cycles composed
       * entirely by directed arcs are never false positives. */
      UNPROTECT(1);
      return build_return_array(nodes, status, nrows, check_status, return_nodes);

    }/*THEN*/
    else {

      check_status_old = check_status;

    }/*ELSE*/

  }/*FOR*/

  UNPROTECT(1);
  return build_return_array(nodes, status, nrows, check_status, return_nodes);

}/*IS_PDAG_ACYCLIC*/
Пример #4
0
/* get root or leaf nodes of the graph. */
SEXP root_nodes(SEXP bn, SEXP leaves) {

short int *status = NULL;
int *get_leaves = INTEGER(leaves);
int i = 0, k = 0, counter = 0;
SEXP temp, temp2, nodes, node_data, labels, result;

  /* get to the nodes' data. */
  nodes = getListElement(bn, "nodes");
  /* this is for "bn.fit" objects. */
  if (isNull(nodes))
    nodes = bn;
  /* get the nodes' labels. */
  labels = getAttrib(nodes, R_NamesSymbol);
  /* allocate and initialize a status vector. */
  status = allocstatus(LENGTH(nodes));

  for (i = 0; i < LENGTH(nodes); i++) {

    /* get the parents/children of this node. */
    node_data = VECTOR_ELT(nodes, i);

    if (*get_leaves == FALSE)
      temp = getListElement(node_data, "parents");
    else
      temp = getListElement(node_data, "children");

    /* this is not a root/leaf node, go on. */
    if (LENGTH(temp) != 0)
      continue;

    /* this takes care of dubious neighbours in "bn" objects. */
    temp = getListElement(node_data, "nbr");

    if (!isNull(temp)) {

      if (*get_leaves == FALSE)
        temp2 = getListElement(node_data, "children");
      else
        temp2 = getListElement(node_data, "parents");

      /* in partially directed graphs not all neighbours can be classified as
       * parents or children due to undirected arcs; return only nodes which
       * are not incident on any undirected arc. */
      if (LENGTH(temp) != LENGTH(temp2))
        continue;

    }/*THEN*/

    /* this is a root/leaf node, all right. */
    status[i] = 1;
    /* increase the counter. */
    counter++;

  }/*FOR*/

  /* allocate and initialize the result. */
  PROTECT(result = allocVector(STRSXP, counter));

  for (i = 0; i < LENGTH(nodes); i++)
    if (status[i] == 1)
      SET_STRING_ELT(result, k++, STRING_ELT(labels, i));

  UNPROTECT(1);

  return result;

}/*ROOT_NODES*/
Пример #5
0
double castelo_prior(SEXP beta, SEXP target, SEXP parents, SEXP children,
                     SEXP debug) {

    int i = 0, k = 0, t = 0, nnodes = 0, cur_arc = 0;
    int nbeta = LENGTH(VECTOR_ELT(beta, 0));
    int *temp = NULL, *debuglevel = LOGICAL(debug), *aid = INTEGER(VECTOR_ELT(beta, 2));
    double prior = 0, result = 0;
    double *bkwd = REAL(VECTOR_ELT(beta, 4)), *fwd = REAL(VECTOR_ELT(beta, 3));
    short int *adjacent = NULL;
    SEXP nodes, try;

    /* get the node labels. */
    nodes = getAttrib(beta, install("nodes"));
    nnodes = LENGTH(nodes);

    /* match the target node. */
    PROTECT(try = match(nodes, target, 0));
    t = INT(try);
    UNPROTECT(1);

    /* find out which nodes are parents and which nodes are children. */
    adjacent = allocstatus(nnodes);

    PROTECT(try = match(nodes, parents, 0));
    temp = INTEGER(try);
    for (i = 0; i < LENGTH(try); i++)
        adjacent[temp[i] - 1] = PARENT;
    UNPROTECT(1);

    PROTECT(try = match(nodes, children, 0));
    temp = INTEGER(try);
    for (i = 0; i < LENGTH(try); i++)
        adjacent[temp[i] - 1] = CHILD;
    UNPROTECT(1);

    /* prior probabilities table lookup. */
    for (i = t + 1; i <= nnodes; i++) {

        /* compute the arc id. */
        cur_arc = UPTRI3(t, i, nnodes);

        /* look up the prior probability. */
        for (/*k,*/ prior = ((double)1/3); k < nbeta; k++) {

            /* arcs are ordered, so we can stop early in the lookup. */
            if (aid[k] > cur_arc)
                break;

            if (aid[k] == cur_arc) {

                switch(adjacent[i - 1]) {

                case PARENT:
                    prior = bkwd[k];
                    break;
                case CHILD:
                    prior = fwd[k];
                    break;
                default:
                    prior = 1 - bkwd[k] - fwd[k];

                }/*SWITCH*/

                break;

            }/*THEN*/

        }/*FOR*/

        if (*debuglevel > 0) {

            switch(adjacent[i - 1]) {

            case PARENT:
                Rprintf("  > found arc %s -> %s, prior pobability is %lf.\n",
                        NODE(i - 1), NODE(t - 1), prior);
                break;
            case CHILD:
                Rprintf("  > found arc %s -> %s, prior probability is %lf.\n",
                        NODE(t - 1), NODE(i - 1), prior);
                break;
            default:
                Rprintf("  > no arc between %s and %s, prior probability is %lf.\n",
                        NODE(t - 1), NODE(i - 1), prior);

            }/*SWITCH*/

        }/*THEN*/

        /* move to log-scale and divide by the non-informative log(1/3), so that
         * the contribution of each arc whose prior has not been not specified by
         * the user is zero; overflow is likely otherwise. */
        result += log(prior / ((double)1/3));

    }/*FOR*/

    return result;

}/*CASTELO_PRIOR*/

/* complete a prior as per Castelo & Siebes. */
SEXP castelo_completion(SEXP prior, SEXP nodes)  {

    int i = 0, k = 0, cur = 0, narcs1 = 0, narcs2 = 0, nnodes = LENGTH(nodes);
    int *m1 = NULL, *m2 = NULL, *und = NULL, *aid = NULL, *poset = NULL, *id = NULL;
    double *d1 = NULL, *d2 = NULL, *p = NULL;
    SEXP df, arc_id, undirected, a1, a2, match1, match2, prob;
    SEXP result, colnames, from, to, nid, dir1, dir2;

    /* compute numeric IDs for the arcs. */
    a1 = VECTOR_ELT(prior, 0);
    a2 = VECTOR_ELT(prior, 1);
    narcs1 = LENGTH(a1);
    PROTECT(match1 = match(nodes, a1, 0));
    PROTECT(match2 = match(nodes, a2, 0));
    m1 = INTEGER(match1);
    m2 = INTEGER(match2);
    PROTECT(arc_id = allocVector(INTSXP, narcs1));
    aid = INTEGER(arc_id);

    c_arc_hash(&narcs1, &nnodes, m1, m2, aid, NULL, TRUE);

    /* duplicates correspond to undirected arcs. */
    PROTECT(undirected = dupe(arc_id));
    und = INTEGER(undirected);

    /* extract the components from the prior. */
    prob = VECTOR_ELT(prior, 2);
    p = REAL(prob);

    /* count output arcs. */
    for (i = 0; i < narcs1; i++)
        narcs2 += 2 - und[i];
    narcs2 /= 2;

    /* allocate the columns of the return value. */
    PROTECT(from = allocVector(STRSXP, narcs2));
    PROTECT(to = allocVector(STRSXP, narcs2));
    PROTECT(nid = allocVector(INTSXP, narcs2));
    id = INTEGER(nid);
    PROTECT(dir1 = allocVector(REALSXP, narcs2));
    d1 = REAL(dir1);
    PROTECT(dir2 = allocVector(REALSXP, narcs2));
    d2 = REAL(dir2);

    /* sort the strength coefficients. */
    poset = alloc1dcont(narcs1);
    for (k = 0; k < narcs1; k++)
        poset[k] = k;
    R_qsort_int_I(aid, poset, 1, narcs1);

    for (i = 0, k = 0; i < narcs1; i++) {

        cur = poset[i];

#define ASSIGN(A1, A2, D1, D2) \
  SET_STRING_ELT(from,  k, STRING_ELT(A1, cur)); \
  SET_STRING_ELT(to,  k, STRING_ELT(A2, cur)); \
  id[k] = aid[i]; \
  D1[k] = p[cur]; \
  if ((und[cur] == TRUE) && (i < narcs1 - 1)) \
    D2[k] = p[poset[++i]]; \
  else \
    D2[k] = (1 - D1[k])/2;

        /* copy the node labels. */
        if (m1[cur] < m2[cur]) {

            ASSIGN(a1, a2, d1, d2);

        }/*THEN*/
        else {

            ASSIGN(a2, a1, d2, d1);

        }/*ELSE*/

        if (d1[k] + d2[k] > 1) {

            UNPROTECT(9);

            error("the probabilities for arc %s -> %s sum to %lf.",
                  CHAR(STRING_ELT(from, k)), CHAR(STRING_ELT(to, k)), d1[k] + d2[k]);

        }/*THEN*/

        /* move to the next arc. */
        k++;

    }/*FOR*/

    /* set up the return value. */
    PROTECT(result = allocVector(VECSXP, 5));
    SET_VECTOR_ELT(result, 0, from);
    SET_VECTOR_ELT(result, 1, to);
    SET_VECTOR_ELT(result, 2, nid);
    SET_VECTOR_ELT(result, 3, dir1);
    SET_VECTOR_ELT(result, 4, dir2);
    PROTECT(colnames = allocVector(STRSXP, 5));
    SET_STRING_ELT(colnames, 0, mkChar("from"));
    SET_STRING_ELT(colnames, 1, mkChar("to"));
    SET_STRING_ELT(colnames, 2, mkChar("aid"));
    SET_STRING_ELT(colnames, 3, mkChar("fwd"));
    SET_STRING_ELT(colnames, 4, mkChar("bkwd"));
    setAttrib(result, R_NamesSymbol, colnames);
    PROTECT(df = minimal_data_frame(result));

    UNPROTECT(12);

    return df;

}/*CASTELO_COMPLETION*/
Пример #6
0
/* Chow-Liu structure learning algorithm. */
SEXP chow_liu(SEXP data, SEXP nodes, SEXP estimator, SEXP whitelist,
    SEXP blacklist, SEXP conditional, SEXP debug) {

int i = 0, j = 0, k = 0, debug_coord[2], ncols = LENGTH(data);
int num = LENGTH(VECTOR_ELT(data, i)), narcs = 0, nwl = 0, nbl = 0;
int *nlevels = NULL, *clevels = NULL, *est = INTEGER(estimator);
int *wl = NULL, *bl = NULL, *poset = NULL, *debuglevel = LOGICAL(debug);
void **columns = NULL, *cond = NULL;
short int *include = NULL;
double *mim = NULL;
SEXP arcs, wlist, blist;

  /* dereference the columns of the data frame. */
  DEREFERENCE_DATA_FRAME()

  /* only TAN uses a conditional variable, so assume it's discrete and go ahead. */
  if (conditional != R_NilValue) {

    cond = (void *) INTEGER(conditional);
    clevels = alloc1dcont(1);
    *clevels = NLEVELS(conditional); 

  }/*THEN*/

  /* allocate the mutual information matrix and the status vector. */
  mim = alloc1dreal(UPTRI3_MATRIX(ncols));
  include = allocstatus(UPTRI3_MATRIX(ncols));

  /* compute the pairwise mutual information coefficients. */
  if (*debuglevel > 0)
    Rprintf("* computing pairwise mutual information coefficients.\n");

  mi_matrix(mim, columns, ncols, nlevels, &num, cond, clevels, est);

  LIST_MUTUAL_INFORMATION_COEFS()

  /* add whitelisted arcs first. */
  if ((!isNull(whitelist)) && (LENGTH(whitelist) > 0)) {

    PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE));
    wl = INTEGER(wlist);
    nwl = LENGTH(wlist);

    for (i = 0; i < nwl; i++) {

      if (*debuglevel > 0) {

        Rprintf("* adding whitelisted arcs first.\n");

        if (include[wl[i]] == 0) {

          Rprintf("  > arc %s - %s has been added to the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was already present in the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (include[wl[i]] == 0)
        narcs++;
      /* include the arc in the graph. */
      include[wl[i]] = 1;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  /* cache blacklisted arcs. */
  if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0)) {

    PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE));
    bl = INTEGER(blist);
    nbl = LENGTH(blist);

  }/*THEN*/

  /* sort the mutual information coefficients and keep track of the elements' index.  */
  poset = alloc1dcont(UPTRI3_MATRIX(ncols));
  for (i = 0; i < UPTRI3_MATRIX(ncols); i++)
    poset[i] = i;
  R_qsort_I(mim, poset, 1, UPTRI3_MATRIX(ncols));

  for (i = UPTRI3_MATRIX(ncols) - 1; i > 0; i--) {

    /* get back the coordinates from the position in the half-matrix. */
    INV_UPTRI3(poset[i], ncols, debug_coord);

    /* already included all the arcs we had to, exiting. */
    if (narcs >= ncols - 1)
      break;
    /* arc already present in the graph, nothing to do. */
    if (include[poset[i]] == 1)
      continue;

    if (bl) {

      if (chow_liu_blacklist(bl, &nbl, poset + i)) {

        if (*debuglevel > 0) {

          Rprintf("* arc %s - %s is blacklisted, skipping.\n",
            NODE(debug_coord[0]), NODE(debug_coord[1]));

        }/*THEN*/

        continue;

      }/*THEN*/

    }/*THEN*/

    if (c_uptri3_path(include, debug_coord[0], debug_coord[1], ncols, nodes, FALSE)) {

      if (*debuglevel > 0) {

        Rprintf("* arc %s - %s introduces cycles, skipping.\n",
          NODE(debug_coord[0]), NODE(debug_coord[1]));

      }/*THEN*/

      continue;

    }/*THEN*/

    if (*debuglevel > 0) {

      Rprintf("* adding arc %s - %s with mutual information %lf.\n",
        NODE(debug_coord[0]), NODE(debug_coord[1]), mim[i]);

    }/*THEN*/

    /* include the arc in the graph. */
    include[poset[i]] = 1;
    /* update the counter. */
    narcs++;

  }/*FOR*/

  if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0))
    UNPROTECT(1);

  /* sanity check for blacklist-related madnes. */
  if (narcs != ncols - 1)
    error("learned %d arcs instead of %d, this is not a tree spanning all the nodes.",
      narcs, ncols - 1);

  CONVERT_TO_ARC_SET(include, 0, 2 * (ncols - 1));

  return arcs;

}/*CHOW_LIU*/
Пример #7
0
/* ARACNE structure learning algorithm. */
SEXP aracne(SEXP data, SEXP estimator, SEXP whitelist, SEXP blacklist, SEXP debug) {

int i = 0, j = 0, k = 0, coord = 0, ncols = LENGTH(data);
int num = LENGTH(VECTOR_ELT(data, i)), narcs = ncols * (ncols - 1) / 2;
int *nlevels = NULL, *est = INTEGER(estimator), *wl = NULL, *bl = NULL;
int *debuglevel = LOGICAL(debug);
void **columns = NULL;
short int *exclude = NULL;
double *mim = NULL;
SEXP arcs, nodes, wlist, blist;

  nodes = getAttrib(data, R_NamesSymbol);

  /* dereference the columns of the data frame. */
  DEREFERENCE_DATA_FRAME()

  /* allocate the mutual information matrix and the status vector. */
  mim = alloc1dreal(UPTRI3_MATRIX(ncols));
  exclude = allocstatus(UPTRI3_MATRIX(ncols));

  /* compute the pairwise mutual information coefficients. */
  if (*debuglevel > 0)
    Rprintf("* computing pairwise mutual information coefficients.\n");

  mi_matrix(mim, columns, ncols, nlevels, &num, NULL, NULL, est);

  LIST_MUTUAL_INFORMATION_COEFS()

  /* compare all the triplets. */
  for (i = 0; i < ncols; i++) {

    for (j = i + 1; j < ncols; j++) {

      for (k = 0; k < ncols; k++) {

        if ((k == i) || (k == j))
          continue;

        /* cache the UPTRI3 coordinates of the arc. */
        coord = UPTRI3(i + 1, j + 1, ncols);

        /* if MI(X, Y) < min(MI(X, Z), MI(Z, Y)) drop arc X - Y. */
        if ((mim[coord] < mim[UPTRI3(i + 1, k + 1, ncols)]) &&
            (mim[coord] < mim[UPTRI3(j + 1, k + 1, ncols)])) {

          if (*debuglevel > 0) {

            Rprintf("* dropping arc %s - %s because of %s, %lf < min(%lf, %lf)\n",
              NODE(i), NODE(j), NODE(k), mim[UPTRI3(i + 1, j + 1, ncols)],
              mim[UPTRI3(i + 1, k + 1, ncols)], mim[UPTRI3(j + 1, k + 1, ncols)]);

          }/*THEN*/

          /* update the status vector. */
          exclude[coord] = 1;
          /* decrement the number of arcs. */
          narcs--;

          break;

        }/*THEN*/

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  /* add back whitelisted arcs. */
  if ((!isNull(whitelist)) && (LENGTH(whitelist) > 0)) {

    PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE));
    wl = INTEGER(wlist);

    for (i = 0; i < LENGTH(wlist); i++) {

      if (*debuglevel > 0) {

        Rprintf("* adding back whitelisted arcs.\n");

        if (exclude[wl[i]] == 1) {

          Rprintf("  > arc %s - %s has been added to the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + LENGTH(wlist))));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was already present in the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + LENGTH(wlist))));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (exclude[wl[i]] == 1)
        narcs++;
      /* include the arc in the graph. */
      exclude[wl[i]] = 0;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  /* remove blacklisted arcs. */
  if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0)) {

    PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE));
    bl = INTEGER(blist);

    for (i = 0; i < LENGTH(blist); i++) {

      if (*debuglevel > 0) {

        Rprintf("* removing blacklisted arcs.\n");

        if (exclude[bl[i]] == 0) {

          Rprintf("  > arc %s - %s has been dropped from the graph.\n",
            CHAR(STRING_ELT(blacklist, i)), CHAR(STRING_ELT(blacklist, i + LENGTH(blist))));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was not present in the graph.\n",
            CHAR(STRING_ELT(blacklist, i)), CHAR(STRING_ELT(blacklist, i + LENGTH(blist))));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (exclude[bl[i]] == 0)
        narcs--;
      /* remove the arc from the graph. */
      exclude[bl[i]] = 1;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  CONVERT_TO_ARC_SET(exclude, 1, 2 * narcs);

  return arcs;

}/*ARACNE*/
Пример #8
0
SEXP is_dag_acyclic(SEXP arcs, SEXP nodes, SEXP return_nodes, SEXP debug) {

SEXP amat;
int i = 0, j = 0, z = 0;
int nrows = LENGTH(nodes);
int check_status = nrows;
int check_status_old = nrows;
int rowsums, colsums;
int *a = NULL, *debuglevel = NULL;
short int *status = NULL;

  /* dereference the debug parameter. */
  debuglevel = LOGICAL(debug);

  /* build the adjacency matrix from the arc set.  */
  if (*debuglevel > 0)
    Rprintf("* building the adjacency matrix.\n");

  PROTECT(amat = arcs2amat(arcs, nodes));
  a = INTEGER(amat);

  /* allocate and initialize the status array. */
  status = allocstatus(nrows);

  if (*debuglevel > 0)
    Rprintf("* checking whether the directed graph is acyclic.\n");

  /* even in the worst case scenario at least two nodes are marked as
   * good at each iteration, so even ceil(nrows/2) iterations should be
   * enough. */
  for (z = 0; z < nrows; z++) {

    if (*debuglevel > 0)
      Rprintf("* beginning iteration %d.\n", z + 1);

    for (i = 0; i < nrows; i++) {

      /* skip known-good nodes. */
      if (status[i] == GOOD) continue;

      /* reset and update row and column totals. */
      rowsums = colsums = 0;

      /* compute row and column totals for the i-th node. */
      for (j = 0; j < nrows; j++) {

        rowsums += a[CMC(i, j, nrows)];
        colsums += a[CMC(j, i, nrows)];

      }/*FOR*/

      if (*debuglevel > 0)
        Rprintf("  > checking node %s (%d child(ren), %d parent(s)).\n",
          NODE(i), rowsums, colsums);

      /* if either total is zero, the node is either a root node or a
       * leaf node, and is not part of any cycle. */
      if ((rowsums == 0) || (colsums == 0)) {

        if (*debuglevel > 0)
          Rprintf("  @ node %s is cannot be part of a cycle.\n", NODE(i));

        for (j = 0; j < nrows; j++)
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

        /* mark the node as good. */
        status[i] = GOOD;
        check_status--;

      }/*THEN*/

    }/*FOR*/

    /* at least three nodes are needed to have a cycle. */
    if (check_status < 3) {

      if (*debuglevel > 0)
        Rprintf("@ at least three nodes are needed to have a cycle.\n");

      UNPROTECT(1);
      return build_return_array(nodes, status, nrows, check_status, return_nodes);

    }/*THEN*/

    /* if there are three or more bad nodes and there was no change in
     * the last iteration, the algorithm is stuck on a cycle. */
    if (check_status_old == check_status) {

      if (*debuglevel > 0)
        Rprintf("@ no change in the last iteration.\n");

      UNPROTECT(1);
      return build_return_array(nodes, status, nrows, check_status, return_nodes);

    }/*THEN*/
    else
      check_status_old = check_status;

  }/*FOR*/

  UNPROTECT(1);
  return build_return_array(nodes, status, nrows, check_status, return_nodes);

}/*IS_DAG_ACYCLIC*/
Пример #9
0
/* check neighbourhood sets and markov blankets for consistency.. */
SEXP bn_recovery(SEXP bn, SEXP strict, SEXP mb, SEXP debug, SEXP filter) {

int i = 0, j = 0, k = 0, n = 0, counter = 0;
short int *checklist = NULL, err = 0;
int *debuglevel = NULL, *checkmb = NULL, *nbrfilter = NULL;
SEXP temp, temp2, nodes, elnames = NULL, fixed;

  /* get the names of the nodes. */
  nodes = getAttrib(bn, R_NamesSymbol);
  n = LENGTH(nodes);

  /* allocate and initialize the checklist. */
  checklist = allocstatus(UPTRI_MATRIX(n));

  /* dereference the debug, mb and filter parameters. */
  debuglevel = LOGICAL(debug);
  checkmb = LOGICAL(mb);
  nbrfilter = INTEGER(filter);

  if (*debuglevel > 0) {

    Rprintf("----------------------------------------------------------------\n");

    if (*checkmb)
      Rprintf("* checking consistency of markov blankets.\n");
    else
      Rprintf("* checking consistency of neighbourhood sets.\n");

   }/*THEN*/

  /* scan the structure to determine the number of arcs.  */
  for (i = 0; i < n; i++) {

     if (*debuglevel > 0)
       Rprintf("  > checking node %s.\n",  NODE(i));

    /* get the entry for the (neighbours|elements of the markov blanket)
       of the node.*/
    temp = getListElement(bn, (char *)NODE(i));
    if (!(*checkmb))
      temp = getListElement(temp, "nbr");

    /* check each element of the array and identify which variable it
       corresponds to. */
    for (j = 0; j < LENGTH(temp); j++) {

      for (k = 0; k < n; k++) {

        /* increment the right element of checklist. */
        if (!strcmp(NODE(k), (char *)CHAR(STRING_ELT(temp, j))))
          checklist[UPTRI(i + 1, k + 1, n)]++;

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  /* if A is a neighbour of B, B is a neighbour of A; therefore each entry in
   * the checklist array must be equal to either zero (if the corresponding
   * nodes are not neighbours) or two (if the corresponding nodes are neighbours).
   * Any other value (typically one) is caused by an incorrect (i.e. asymmetric)
   * neighbourhood structure. The same logic holds for the markov blankets. */
  for (i = 0; i < n; i++)
    for (j = i; j < n; j++) {

      if ((checklist[UPTRI(i + 1, j + 1, n)] != 0) &&
          (checklist[UPTRI(i + 1, j + 1, n)] != 2)) {

        if (*debuglevel > 0) {

          if (*checkmb)
            Rprintf("@ asymmetry in the markov blankets for %s and %s.\n",
              NODE(i), NODE(j));
          else
            Rprintf("@ asymmetry in the neighbourhood sets for %s and %s.\n",
              NODE(i), NODE(j));

        }/*THEN*/

        err = 1;

      }/*THEN*/

    }/*FOR*/

  /* no need to go on if the (neighbourhood sets|markov blankets) are symmetric;
   * otherwise throw either an error or a warning according to the value of the
   * strict parameter. */
  if (!err) {

    return bn;

  }/*THEN*/
  else if (isTRUE(strict)) {

    if (*checkmb)
      error("markov blankets are not symmetric.\n");
    else
      error("neighbourhood sets are not symmetric.\n");

  }/*THEN*/
  else {

    if (*checkmb)
      warning("markov blankets are not symmetric.\n");
    else
      warning("neighbourhood sets are not symmetric.\n");

  }/*ELSE*/

  /* build a correct structure to return. */
  PROTECT(fixed = allocVector(VECSXP, n));
  setAttrib(fixed, R_NamesSymbol, nodes);

  if (!(*checkmb)) {

    /* allocate colnames. */
    PROTECT(elnames = allocVector(STRSXP, 2));
    SET_STRING_ELT(elnames, 0, mkChar("mb"));
    SET_STRING_ELT(elnames, 1, mkChar("nbr"));

  }/*THEN*/

  for (i = 0; i < n; i++) {

    if (!(*checkmb)) {

      /* allocate the "mb" and "nbr" elements of the node. */
      PROTECT(temp = allocVector(VECSXP, 2));
      SET_VECTOR_ELT(fixed, i, temp);
      setAttrib(temp, R_NamesSymbol, elnames);

      /* copy the "mb" part from the old structure. */
      temp2 = getListElement(bn, (char *)NODE(i));
      temp2 = getListElement(temp2, "mb");
      SET_VECTOR_ELT(temp, 0, temp2);

    }/*THEN*/

    /* fix the neighbourhoods with an AND or an OR filter.
     * AND means that both neighbours have to see each other
     * to be neighbours, OR means that one neighbour at least
     * as to see the other one for both to be neighbours. */
    switch(*nbrfilter) {

      case AND_FILTER:

        /* rescan the checklist. */
        for (j = 0; j < n; j++)
          if (checklist[UPTRI(i + 1, j + 1, n)] == 2)
            if (i != j)
              counter++;

        /* allocate and fill the "nbr" element. */
        PROTECT(temp2 = allocVector(STRSXP, counter));

        for (j = 0; j < n; j++)
          if (checklist[UPTRI(i + 1, j + 1, n)] == 2)
            if (i != j)
              SET_STRING_ELT(temp2, --counter, STRING_ELT(nodes, j));

        break;

      case OR_FILTER:

        /* rescan the checklist. */
        for (j = 0; j < n; j++)
          if (checklist[UPTRI(i + 1, j + 1, n)] >= 1)
            if (i != j)
              counter++;

        /* allocate and fill the "nbr" element. */
        PROTECT(temp2 = allocVector(STRSXP, counter));

        for (j = 0; j < n; j++)
          if (checklist[UPTRI(i + 1, j + 1, n)] >= 1)
            if (i != j)
              SET_STRING_ELT(temp2, --counter, STRING_ELT(nodes, j));

        break;
    }

    if (*checkmb) {

      SET_VECTOR_ELT(fixed, i, temp2);
      UNPROTECT(1);

    }/*THEN*/
    else {

      SET_VECTOR_ELT(temp, 1, temp2);
      UNPROTECT(2);

    }/*ELSE*/

  }/*FOR*/

  if (*checkmb)
    UNPROTECT(1);
  else
    UNPROTECT(2);

return fixed;

}/*BN_RECOVERY*/