Пример #1
0
SEXP count_observed_values(SEXP data) {

int i = 0, j = 0, ncol = length(data), nrow = length(VECTOR_ELT(data, 0));
int *rr = NULL, *cc = NULL, *temp_integer = NULL;
double *temp_real = NULL;
SEXP counts, rows, cols, temp;

  PROTECT(counts = allocVector(VECSXP, 2));
  setAttrib(counts, R_NamesSymbol, mkStringVec(2, "rows", "columns"));
  PROTECT(rows = allocVector(INTSXP, nrow));
  PROTECT(cols = allocVector(INTSXP, ncol));
  setAttrib(cols, R_NamesSymbol, getAttrib(data, R_NamesSymbol));
  SET_VECTOR_ELT(counts, 0, rows);
  SET_VECTOR_ELT(counts, 1, cols);
  rr = INTEGER(rows);
  cc = INTEGER(cols);
  memset(rr, '\0', nrow * sizeof(int));
  memset(cc, '\0', ncol * sizeof(int));

  for (j = 0; j < ncol; j++) {

    temp = VECTOR_ELT(data, j);

    switch(TYPEOF(temp)) {

       case REALSXP:

         temp_real = REAL(temp);
         for (i = 0; i < nrow; i++) {

           rr[i] += !ISNAN(temp_real[i]);
           cc[j] += !ISNAN(temp_real[i]);

         }/*FOR*/

         break;

       case INTSXP:

         temp_integer = INTEGER(temp);
         for (i = 0; i < nrow; i++) {

           rr[i] += (temp_integer[i] != NA_INTEGER);
           cc[j] += (temp_integer[i] != NA_INTEGER);

         }/*FOR*/

         break;

    }/*SWITCH*/

  }/*FOR*/

  UNPROTECT(3);

  return counts;

}/*COUNT_OBSERVED_VALUES*/
Пример #2
0
/* convert an arc set to a weighted edge list. */
SEXP arcs2welist(SEXP arcs, SEXP nodes, SEXP weights, SEXP nid, SEXP sublist,
    SEXP parents) {

int i = 0, j = 0, k = 0, nnodes = length(nodes), narcs = length(arcs)/2;
int *e = NULL, *coords = NULL, *adjacent = NULL;
int num_id = isTRUE(nid), sub = isTRUE(sublist), up = isTRUE(parents);
double *w = REAL(weights), *ew = NULL;
SEXP try, elist, edges, edge_weights, temp, temp_name = R_NilValue;

  /* allocate the return value. */
  PROTECT(elist = allocVector(VECSXP, nnodes));
  /* set the node names. */
  setAttrib(elist, R_NamesSymbol, nodes);

  /* allocate and initialize the subset name. */
  if (sub > 0)
    PROTECT(temp_name = mkStringVec(2, "edges", "weight"));

  /* allocate the scratch space to keep track adjacent nodes. */
  adjacent = alloc1dcont(nnodes);

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  coords = INTEGER(try);

  for (i = 0; i < narcs; i++)
    adjacent[coords[i + up * narcs] - 1]++;

  for (i = 0; i < nnodes; i++) {

    /* allocate and set up the edge array. */
    if (num_id > 0) {

      PROTECT(edges = allocVector(INTSXP, adjacent[i]));
      e = INTEGER(edges);

    }/*THEN*/
    else {

      PROTECT(edges = allocVector(STRSXP, adjacent[i]));

    }/*ELSE*/

    /* allocate and set up the weights array. */
    PROTECT(edge_weights = allocVector(REALSXP, adjacent[i]));
    ew = REAL(edge_weights);

    /* copy the coordinates or the labels of adjacent nodes. */
    for (j = 0, k = 0; j < narcs; j++) {

      if (coords[j + up * narcs] != i + 1)
        continue;

      /* copy the weight as well. */
      ew[k] = w[j];

      if (num_id > 0)
        e[k++] = coords[(1 - up) * narcs + j];
      else
        SET_STRING_ELT(edges, k++, STRING_ELT(arcs, (1 - up) * narcs + j));

      if (k == adjacent[i])
        break;

    }/*FOR*/

    if (sub > 0) {

      /* allocate and set up the "edge" sublist for graphNEL. */
      PROTECT(temp = allocVector(VECSXP, 2));
      setAttrib(temp, R_NamesSymbol, temp_name);
      SET_VECTOR_ELT(temp, 0, edges);
      SET_VECTOR_ELT(temp, 1, edge_weights);
      SET_VECTOR_ELT(elist, i, temp);
      UNPROTECT(1);

    }/*THEN*/
    else {

      /* save weights with edges as names. */
      setAttrib(edge_weights, R_NamesSymbol, edges);
      SET_VECTOR_ELT(elist, i, edge_weights);

    }/*ELSE*/

    UNPROTECT(2);

  }/*FOR*/

  if (sub > 0)
    UNPROTECT(3);
  else
    UNPROTECT(2);

  return elist;

}/*ARCS2WELIST*/
Пример #3
0
/* a single step of the optimized hill climbing (one arc addition/removal/reversal). */
SEXP hc_opt_step(SEXP amat, SEXP nodes, SEXP added, SEXP cache, SEXP reference,
    SEXP wlmat, SEXP blmat, SEXP nparents, SEXP maxp, SEXP debug) {

int nnodes = length(nodes), i = 0, j = 0;
int *am = NULL, *ad = NULL, *w = NULL, *b = NULL, debuglevel = isTRUE(debug);
int counter = 0, update = 1, from = 0, to = 0, *path = NULL, *scratch = NULL;
double *cache_value = NULL, temp = 0, max = 0, tol = MACHINE_TOL;
double *mp = REAL(maxp), *np = REAL(nparents);
SEXP bestop;

  /* allocate and initialize the return value (use FALSE as a canary value). */
  PROTECT(bestop = allocVector(VECSXP, 3));
  setAttrib(bestop, R_NamesSymbol, mkStringVec(3, "op", "from", "to"));

  /* allocate and initialize a dummy FALSE object. */
  SET_VECTOR_ELT(bestop, 0, ScalarLogical(FALSE));

  /* allocate buffers for c_has_path(). */
  path = Calloc1D(nnodes, sizeof(int));
  scratch = Calloc1D(nnodes, sizeof(int));

  /* save pointers to the numeric/integer matrices. */
  cache_value = REAL(cache);
  ad = INTEGER(added);
  am = INTEGER(amat);
  w = INTEGER(wlmat);
  b = INTEGER(blmat);

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0; i < nnodes * nnodes; i++)
       counter += ad[i];

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to add one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (ad[CMC(i, j, nnodes)] == 0)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)];

      if (debuglevel > 0) {

        Rprintf("  > trying to add %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      /* this score delta is the best one at the moment, so add the arc if it
       * does not introduce cycles in the graph. */
      if (temp - max > tol) {

        if (c_has_path(j, i, am, nnodes, nodes, FALSE, FALSE, path, scratch,
              FALSE)) {

          if (debuglevel > 0)
            Rprintf("    > not adding, introduces cycles in the graph.\n");

          continue;

        }/*THEN*/

        if (debuglevel > 0)
          Rprintf("    @ adding %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "set", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0, counter = 0; i < nnodes * nnodes; i++)
       counter += am[i] * (1 - w[i]);

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to remove one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (am[CMC(i, j, nnodes)] == 0)
        continue;

      /* whitelisted arcs are not to be removed, ever. */
      if (w[CMC(i, j, nnodes)] == 1)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)];

      if (debuglevel > 0) {

        Rprintf("  > trying to remove %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      if (temp - max > tol) {

        if (debuglevel > 0)
          Rprintf("    @ removing %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "drop", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0, counter = 0; i < nnodes; i++)
       for (j = 0; j < nnodes; j++)
         counter += am[CMC(i, j, nnodes)] * (1 - b[CMC(j, i, nnodes)]);

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to reverse one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (am[CMC(i, j, nnodes)] == 0)
        continue;

      /* don't reverse an arc if the one in the opposite direction is
       * blacklisted, ever. */
      if (b[CMC(j, i, nnodes)] == 1)
        continue;

      /* do not reverse an arc if that means violating the limit on the
       * maximum number of parents. */
      if (np[i] >= *mp)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)] + cache_value[CMC(j, i, nnodes)];
      /* nuke small values and negative zeroes. */
      if (fabs(temp) < tol) temp = 0;

      if (debuglevel > 0) {

        Rprintf("  > trying to reverse %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      if (temp - max > tol) {

        if (c_has_path(i, j, am, nnodes, nodes, FALSE, TRUE, path, scratch,
              FALSE)) {

          if (debuglevel > 0)
            Rprintf("    > not reversing, introduces cycles in the graph.\n");

          continue;

        }/*THEN*/

        if (debuglevel > 0)
          Rprintf("    @ reversing %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "reverse", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;
        /* both nodes' reference scores must be updated. */
        update = 2;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  /* update the reference scores. */
  REAL(reference)[to] += cache_value[CMC(from, to, nnodes)];
  if (update == 2)
    REAL(reference)[from] += cache_value[CMC(to, from, nnodes)];

  Free1D(path);
  Free1D(scratch);

  UNPROTECT(1);

  return bestop;

}/*HC_OPT_STEP*/
/* convert a set of neighbourhoods into an arc set. */
SEXP nbr2arcs(SEXP nbr) {

int i = 0, j = 0, k = 0, narcs = 0;
int length_names = 0;
SEXP arcs, temp, names;

  /* get the names of the nodes. */
  names = getAttrib(nbr, R_NamesSymbol);
  length_names = length(names);

  /* scan the structure to determine the number of arcs.  */
  for (i = 0; i < length_names; i++) {

    /* get the entry for the neighbours of the node.*/
    temp = getListElement(nbr, (char *)CHAR(STRING_ELT(names, i)));
    temp = getListElement(temp, "nbr");

    narcs += length(temp);

  }/*FOR*/

  /* if there are no arcs, return an empty arc set. */
  if (narcs == 0) {

    /* allocate an empty arc set. */
    PROTECT(arcs = allocMatrix(STRSXP, 0, 2));
    /* set the column names. */
    setDimNames(arcs, R_NilValue, mkStringVec(2, "from", "to"));

    UNPROTECT(1);

    return arcs;

  }/*THEN*/
  else {

    /* allocate the arc set. */
    PROTECT(arcs = allocMatrix(STRSXP, narcs, 2));
    /* set the column names. */
    setDimNames(arcs, R_NilValue, mkStringVec(2, "from", "to"));

  }/*ELSE*/

  /* rescan the structure to build the arc set. */
  for (i = 0; i < length_names; i++) {

    /* get the entry for the neighbours of the node.*/
    temp = getListElement(nbr, (char *)CHAR(STRING_ELT(names, i)));
    temp = getListElement(temp, "nbr");

    for (j = 0; j < length(temp); j++) {

      SET_STRING_ELT(arcs, k, STRING_ELT(names, i));
      SET_STRING_ELT(arcs, k + 1 * narcs , STRING_ELT(temp, j));
      k++;

    }/*FOR*/

  }/*FOR*/

  UNPROTECT(1);

  return arcs;

}/*NBR2ARCS*/
Пример #5
0
/* C-level interface to unique_arcs. */
SEXP c_unique_arcs(SEXP arcs, SEXP nodes, int warnlevel) {

int i = 0, k = 0, nrow = 0, uniq_rows = 0;
int *checklist = NULL;
SEXP result, try, dup;

  /* the arc set is empty, nothing to do. */
  if (length(arcs) == 0)
    return arcs;

  /* there really is a non-empty arc set, process it. */
  nrow = length(arcs)/2;

  /* match the node labels in the arc set. */
  PROTECT(try = arc_hash(arcs, nodes, FALSE, FALSE));
  /* check which are duplicated. */
  PROTECT(dup = duplicated(try, FALSE));
  checklist = INTEGER(dup);

  /* count how many are not. */
  for (i = 0; i < nrow; i++)
    if (checklist[i] == 0)
      uniq_rows++;

  /* if there is no duplicate arc simply return the original arc set. */
  if (uniq_rows == nrow) {

    UNPROTECT(2);
    return arcs;

  }/*THEN*/
  else {

    /* warn the user if told to do so. */
    if (warnlevel > 0)
      warning("removed %d duplicate arcs.", nrow - uniq_rows);

    /* allocate and initialize the return value. */
    PROTECT(result = allocMatrix(STRSXP, uniq_rows, 2));

    /* store the correct arcs in the return value. */
    for (i = 0, k = 0; i < nrow; i++) {

      if (checklist[i] == 0) {

        SET_STRING_ELT(result, k, STRING_ELT(arcs, i));
        SET_STRING_ELT(result, k + uniq_rows, STRING_ELT(arcs, i + nrow));
        k++;

      }/*THEN*/

    }/*FOR*/

  }/*ELSE*/

  /* allocate, initialize and set the column names. */
  setDimNames(result, R_NilValue, mkStringVec(2, "from", "to"));

  UNPROTECT(3);

  return result;

}/*C_UNIQUE_ARCS*/
Пример #6
0
/* return the skeleton of a DAG/PDAG. */
SEXP dag2ug(SEXP bn, SEXP moral, SEXP debug) {

int i = 0, j = 0, k = 0, nnodes = 0, narcs = 0, row = 0;
int debuglevel = isTRUE(debug), *moralize = LOGICAL(moral);
int *nparents = NULL, *nnbr = NULL;
SEXP node_data, current, nodes, result, temp;

  /* get the nodes' data. */
  node_data = getListElement(bn, "nodes");
  nnodes = length(node_data);
  PROTECT(nodes = getAttrib(node_data, R_NamesSymbol));

  /* allocate and initialize parents' and neighbours' counters. */
  nnbr = Calloc1D(nnodes, sizeof(int));
  if (*moralize > 0)
    nparents = Calloc1D(nnodes, sizeof(int));

  /* first pass: count neighbours, parents and resulting arcs. */
  for (i = 0; i < nnodes; i++) {

    /* get the number of neighbours.  */
    current = VECTOR_ELT(node_data, i);
    nnbr[i] = length(getListElement(current, "nbr"));

    /* update the number of arcs to be returned. */
    if (*moralize > 0) {

      /* get also the number of parents, needed to account for the arcs added
       * for their moralization. */
      nparents[i] = length(getListElement(current, "parents"));
      narcs += nnbr[i] + nparents[i] * (nparents[i] - 1);

    }/*THEN*/
    else {

      narcs += nnbr[i];

    }/*ELSE*/

    if (debuglevel > 0)  {

      if (*moralize > 0) {

        Rprintf("* scanning node %s, found %d neighbours and %d parents.\n",
          NODE(i), nnbr[i], nparents[i]);
        Rprintf("  > adding %d arcs, for a total of %d.\n",
          nnbr[i] + nparents[i] * (nparents[i] - 1), narcs);

      }/*THEN*/
      else {

        Rprintf("* scanning node %s, found %d neighbours.\n", NODE(i), nnbr[i]);
        Rprintf("  > adding %d arcs, for a total of %d.\n", nnbr[i], narcs);

      }/*ELSE*/

    }/*THEN*/

  }/*FOR*/

  /* allocate the return value. */
  PROTECT(result = allocMatrix(STRSXP, narcs, 2));
  /* allocate and set the column names. */
  setDimNames(result, R_NilValue, mkStringVec(2, "from", "to"));

  /* second pass: fill the return value. */
  for (i = 0; i < nnodes; i++) {

    /* get to the current node. */
    current = VECTOR_ELT(node_data, i);
    /* get the neighbours. */
    temp = getListElement(current, "nbr");

    for (j = 0; j < nnbr[i]; j++) {

      SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(nodes, i));
      SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, j));
      row++;

    }/*FOR*/

    /* if we are not creating a moral graph we are done with this node. */
    if (*moralize == 0)
      continue;

    /* get the parents. */
    temp = getListElement(current, "parents");

    for (j = 0; j < nparents[i]; j++) {

      for (k = j+1; k < nparents[i]; k++) {

        SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(temp, k));
        SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, j));
        row++;
        SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(temp, j));
        SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, k));
        row++;

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  Free1D(nnbr);

  if (*moralize > 0) {

    /* be really sure not to return duplicate arcs in moral graphs when
     * shielded parents are present (the "shielding" nodes are counted
     * twice). */
    result = c_unique_arcs(result, nodes, FALSE);

    Free1D(nparents);

  }/*THEN*/

  UNPROTECT(2);

  return result;

}/*DAG2UG*/
Пример #7
0
/* arc strength (confidence) and direction coefficients. */
SEXP bootstrap_arc_coefficients(SEXP prob, SEXP nodes) {

int i = 0, j = 0, k = 0, narcs = 0, nnodes = length(nodes);
double *p = NULL, *s = NULL, *d = NULL, tol = MACHINE_TOL;;
SEXP res, rownames, from, to, str, dir;

  /* compute the dimension of the arcs set. */
  narcs = nnodes * (nnodes - 1);

  /* allocate and initialize the various columns. */
  PROTECT(from = allocVector(STRSXP, narcs));
  PROTECT(to = allocVector(STRSXP, narcs));
  PROTECT(str = allocVector(REALSXP, narcs));
  PROTECT(dir = allocVector(REALSXP, narcs));

  /* dereference the probability matrix and the coefficients once and
   * for all. */
  p = REAL(prob);
  s = REAL(str);
  d = REAL(dir);

  /* fill in the coefficients. */
  for (i = 0, k = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* "from" must differ from "to". */
      if (i == j)
        continue;

      /* set the labels of the incident nodes. */
      SET_STRING_ELT(from, k, STRING_ELT(nodes, i));
      SET_STRING_ELT(to, k, STRING_ELT(nodes, j));
      /* compute arc strength and direction confidence. */
      s[k] = p[CMC(i, j, nnodes)] + p[CMC(j, i, nnodes)];
      d[k] = (s[k] == 0 ? 0 : p[CMC(i, j, nnodes)] / s[k]);
      /* sanitize out-of-boundary values arising from floating point errors. */
      s[k] = (s[k] < tol) ? 0 : s[k];
      s[k] = (s[k] > 1 - tol) ? 1 : s[k];
      d[k] = (d[k] < tol) ? 0 : d[k];
      d[k] = (d[k] > 1 - tol) ? 1 : d[k];

      /* increment the arc counter. */
      k++;

    }/*FOR*/

  }/*FOR*/

  /* allocate and initialize the return value. */
  PROTECT(res = allocVector(VECSXP, 4));

  /* allocate, initialize and set the class name. */
  setAttrib(res, R_ClassSymbol, mkString("data.frame"));

  /* allocate, initialize and set row names. */
  PROTECT(rownames = allocVector(INTSXP, narcs));
  for (i = 0; i < narcs; i++)
    INTEGER(rownames)[i] = i + 1;
  setAttrib(res, R_RowNamesSymbol, rownames);

  /* set column names. */
  setAttrib(res, R_NamesSymbol,
    mkStringVec(4, "from", "to", "strength", "direction"));

  /* attach the four columns. */
  SET_VECTOR_ELT(res, 0, from);
  SET_VECTOR_ELT(res, 1, to);
  SET_VECTOR_ELT(res, 2, str);
  SET_VECTOR_ELT(res, 3, dir);

  UNPROTECT(6);
  return res;

}/*BOOTSTRAP_ARC_COEFFICIENTS*/
Пример #8
0
double castelo_prior(SEXP beta, SEXP target, SEXP parents, SEXP children,
    int debuglevel) {

int i = 0, k = 0, t = 0, nnodes = 0, cur_arc = 0;
int nbeta = length(VECTOR_ELT(beta, 0));
int *temp = NULL, *aid = INTEGER(VECTOR_ELT(beta, 2));
double prior = 0, result = 0;
double *bkwd = REAL(VECTOR_ELT(beta, 4)), *fwd = REAL(VECTOR_ELT(beta, 3));
short int *adjacent = NULL;
SEXP nodes, try;

  /* get the node labels. */
  nodes = getAttrib(beta, BN_NodesSymbol);
  nnodes = length(nodes);

  /* match the target node. */
  PROTECT(try = match(nodes, target, 0));
  t = INT(try);
  UNPROTECT(1);

  /* find out which nodes are parents and which nodes are children. */
  adjacent = allocstatus(nnodes);

  PROTECT(try = match(nodes, parents, 0));
  temp = INTEGER(try);
  for (i = 0; i < length(try); i++)
    adjacent[temp[i] - 1] = PARENT;
  UNPROTECT(1);

  PROTECT(try = match(nodes, children, 0));
  temp = INTEGER(try);
  for (i = 0; i < length(try); i++)
    adjacent[temp[i] - 1] = CHILD;
  UNPROTECT(1);

  /* prior probabilities table lookup. */
  for (i = t + 1; i <= nnodes; i++) {

    /* compute the arc id. */
    cur_arc = UPTRI3(t, i, nnodes);

    /* look up the prior probability. */
    for (/*k,*/ prior = ((double)1/3); k < nbeta; k++) {

      /* arcs are ordered, so we can stop early in the lookup. */
      if (aid[k] > cur_arc)
        break;

      if (aid[k] == cur_arc) {

        switch(adjacent[i - 1]) {

          case PARENT:
            prior = bkwd[k];
            break;
          case CHILD:
            prior = fwd[k];
            break;
          default:
            prior = fmax2(0, 1 - bkwd[k] - fwd[k]);

        }/*SWITCH*/

        break;

      }/*THEN*/

    }/*FOR*/

    if (debuglevel > 0) {

      switch(adjacent[i - 1]) {

        case PARENT:
          Rprintf("  > found arc %s -> %s, prior pobability is %lf.\n",
            NODE(i - 1), NODE(t - 1), prior);
          break;
        case CHILD:
          Rprintf("  > found arc %s -> %s, prior probability is %lf.\n",
            NODE(t - 1), NODE(i - 1), prior);
          break;
        default:
          Rprintf("  > no arc between %s and %s, prior probability is %lf.\n",
            NODE(t - 1), NODE(i - 1), prior);

      }/*SWITCH*/

    }/*THEN*/

    /* move to log-scale and divide by the non-informative log(1/3), so that
     * the contribution of each arc whose prior has not been specified by the
     * user is zero; overflow is likely otherwise. */
    result += log(prior / ((double)1/3));

  }/*FOR*/

  return result;

}/*CASTELO_PRIOR*/

/* complete a prior as per Castelo & Siebes. */
SEXP castelo_completion(SEXP prior, SEXP nodes, SEXP learning)  {

int i = 0, k = 0, cur = 0, narcs1 = 0, narcs2 = 0, nnodes = length(nodes);
int *m1 = NULL, *m2 = NULL, *und = NULL, *aid = NULL, *poset = NULL, *id = NULL;
double *d1 = NULL, *d2 = NULL, *p = NULL;
SEXP df, arc_id, undirected, a1, a2, match1, match2, prob;
SEXP result, from, to, nid, dir1, dir2;

  /* compute numeric IDs for the arcs. */
  a1 = VECTOR_ELT(prior, 0);
  a2 = VECTOR_ELT(prior, 1);
  narcs1 = length(a1);
  PROTECT(match1 = match(nodes, a1, 0));
  PROTECT(match2 = match(nodes, a2, 0));
  m1 = INTEGER(match1);
  m2 = INTEGER(match2);
  PROTECT(arc_id = allocVector(INTSXP, narcs1));
  aid = INTEGER(arc_id);

  c_arc_hash(narcs1, nnodes, m1, m2, aid, NULL, FALSE);

  /* duplicates correspond to undirected arcs. */
  PROTECT(undirected = dupe(arc_id));
  und = INTEGER(undirected);

  /* extract the components from the prior. */
  prob = VECTOR_ELT(prior, 2);
  p = REAL(prob);

  /* count output arcs. */
  for (i = 0; i < narcs1; i++)
    narcs2 += 2 - und[i];
  narcs2 /= 2;

  /* allocate the columns of the return value. */
  PROTECT(from = allocVector(STRSXP, narcs2));
  PROTECT(to = allocVector(STRSXP, narcs2));
  PROTECT(nid = allocVector(INTSXP, narcs2));
  id = INTEGER(nid);
  PROTECT(dir1 = allocVector(REALSXP, narcs2));
  d1 = REAL(dir1);
  PROTECT(dir2 = allocVector(REALSXP, narcs2));
  d2 = REAL(dir2);

  /* sort the strength coefficients. */
  poset = alloc1dcont(narcs1);
  for (k = 0; k < narcs1; k++)
    poset[k] = k;
  R_qsort_int_I(aid, poset, 1, narcs1);

  for (i = 0, k = 0; i < narcs1; i++) {

    cur = poset[i];

#define ASSIGN(A1, A2, D1, D2) \
  SET_STRING_ELT(from,  k, STRING_ELT(A1, cur)); \
  SET_STRING_ELT(to,  k, STRING_ELT(A2, cur)); \
  id[k] = aid[i]; \
  D1[k] = p[cur]; \
  if ((und[cur] == TRUE) && (i < narcs1 - 1)) \
    D2[k] = p[poset[++i]]; \
  else \
    D2[k] = (1 - D1[k])/2;

    /* copy the node labels. */
    if (m1[cur] < m2[cur]) {

      ASSIGN(a1, a2, d1, d2);

    }/*THEN*/
    else {

      ASSIGN(a2, a1, d2, d1);

    }/*ELSE*/

    /* check the probabilities do not exceed 1; fail only for large errors. */
    if (d1[k] + d2[k] > 1) {

      if (d1[k] + d2[k] < 1 + 2 * MACHINE_TOL) {

        d1[k] = d1[k] / (d1[k] + d2[k]);
        d2[k] = d2[k] / (d1[k] + d2[k]);

      }/*THEN*/
      else {

        UNPROTECT(9);

        error("the probabilities for arc %s -> %s sum to %lf.",
          CHAR(STRING_ELT(from, k)), CHAR(STRING_ELT(to, k)), d1[k] + d2[k]);

      }/*ELSE*/

    }/*THEN*/

    /* bound the probability of not including an arc away from zero, structure
     * learning otherwise fails when starting from the empty graph and gets
     * stuck very easily in general. */
    if (isTRUE(learning) && (fabs(1 - d1[k] - d2[k]) < MACHINE_TOL)) {

      d1[k] = d1[k] - MACHINE_TOL;
      d2[k] = d2[k] - MACHINE_TOL;

    }/*THEN*/

    /* move to the next arc. */
    k++;

  }/*FOR*/

  /* set up the return value. */
  PROTECT(result = allocVector(VECSXP, 5));
  SET_VECTOR_ELT(result, 0, from);
  SET_VECTOR_ELT(result, 1, to);
  SET_VECTOR_ELT(result, 2, nid);
  SET_VECTOR_ELT(result, 3, dir1);
  SET_VECTOR_ELT(result, 4, dir2);
  setAttrib(result, R_NamesSymbol,
    mkStringVec(5, "from", "to", "aid", "fwd", "bkwd"));
  PROTECT(df = minimal_data_frame(result));

  UNPROTECT(11);

  return df;

}/*CASTELO_COMPLETION*/
Пример #9
0
/* check neighbourhood sets and markov blanets for consistency.. */
SEXP bn_recovery(SEXP bn, SEXP strict, SEXP mb, SEXP filter, SEXP debug) {

int i = 0, j = 0, k = 0, n = 0, counter = 0;
short int *checklist = NULL, err = 0;
int debuglevel = isTRUE(debug), checkmb = isTRUE(mb), *flt = INTEGER(filter);
SEXP temp, temp2, nodes, elnames = NULL, fixed;

  /* get the names of the nodes. */
  nodes = getAttrib(bn, R_NamesSymbol);
  n = length(nodes);

  /* allocate and initialize the checklist. */
  checklist = allocstatus(UPTRI_MATRIX(n));

  if (debuglevel > 0) {

    Rprintf("----------------------------------------------------------------\n");

    if (checkmb)
      Rprintf("* checking consistency of markov blankets.\n");
    else
      Rprintf("* checking consistency of neighbourhood sets.\n");

   }/*THEN*/

  /* scan the structure to determine the number of arcs.  */
  for (i = 0; i < n; i++) {

     if (debuglevel > 0)
       Rprintf("  > checking node %s.\n",  NODE(i));

    /* get the entry for the (neighbours|elements of the markov blanket)
       of the node.*/
    temp = getListElement(bn, (char *)NODE(i));
    if (!checkmb)
      temp = getListElement(temp, "nbr");

    /* check each element of the array and identify which variable it
       corresponds to. */
    for (j = 0; j < length(temp); j++) {

      for (k = 0; k < n; k++) {

        /* increment the right element of checklist. */
        if (!strcmp(NODE(k), (char *)CHAR(STRING_ELT(temp, j))))
          checklist[UPTRI(i + 1, k + 1, n)]++;

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  /* if A is a neighbour of B, B is a neighbour of A; therefore each entry in
   * the checklist array must be equal to either zero (if the corresponding
   * nodes are not neighbours) or two (if the corresponding nodes are neighbours).
   * Any other value (typically one) is caused by an incorrect (i.e. asymmetric)
   * neighbourhood structure. The same logic holds for the markov blankets. */
  for (i = 0; i < n; i++)
    for (j = i; j < n; j++) {

      if ((checklist[UPTRI(i + 1, j + 1, n)] != 0) &&
          (checklist[UPTRI(i + 1, j + 1, n)] != 2)) {

        if (debuglevel > 0) {

          if (checkmb)
            Rprintf("@ asymmetry in the markov blankets for %s and %s.\n",
              NODE(i), NODE(j));
          else
            Rprintf("@ asymmetry in the neighbourhood sets for %s and %s.\n",
              NODE(i), NODE(j));

        }/*THEN*/

        err = 1;

      }/*THEN*/

    }/*FOR*/

  /* no need to go on if the (neighbourhood sets|markov blankets) are symmetric;
   * otherwise throw either an error or a warning according to the value of the
   * strict parameter. */
  if (!err) {

    return bn;

  }/*THEN*/
  else if (isTRUE(strict)) {

    if (checkmb)
      error("markov blankets are not symmetric.\n");
    else
      error("neighbourhood sets are not symmetric.\n");

  }/*THEN*/

  /* build a correct structure to return. */
  PROTECT(fixed = allocVector(VECSXP, n));
  setAttrib(fixed, R_NamesSymbol, nodes);

  /* allocate colnames. */
  if (!checkmb)
    PROTECT(elnames = mkStringVec(2, "mb", "nbr"));

  for (i = 0; i < n; i++) {

    if (!checkmb) {

      /* allocate the "mb" and "nbr" elements of the node. */
      PROTECT(temp = allocVector(VECSXP, 2));
      SET_VECTOR_ELT(fixed, i, temp);
      setAttrib(temp, R_NamesSymbol, elnames);

      /* copy the "mb" part from the old structure. */
      temp2 = getListElement(bn, (char *)NODE(i));
      temp2 = getListElement(temp2, "mb");
      SET_VECTOR_ELT(temp, 0, temp2);

    }/*THEN*/

    /* rescan the checklist. */
    for (j = 0; j < n; j++)
      if (checklist[UPTRI(i + 1, j + 1, n)] >= *flt)
        if (i != j)
          counter++;

    /* allocate and fill the "nbr" element. */
    PROTECT(temp2 = allocVector(STRSXP, counter));

    for (j = 0; j < n; j++)
      if (checklist[UPTRI(i + 1, j + 1, n)] == *flt)
        if (i != j)
          SET_STRING_ELT(temp2, --counter, STRING_ELT(nodes, j));

    if (checkmb) {

      SET_VECTOR_ELT(fixed, i, temp2);
      UNPROTECT(1);

    }/*THEN*/
    else {

      SET_VECTOR_ELT(temp, 1, temp2);
      UNPROTECT(2);

    }/*ELSE*/

  }/*FOR*/

  if (checkmb)
    UNPROTECT(1);
  else
    UNPROTECT(2);

  return fixed;

}/*BN_RECOVERY*/