Ejemplo n.º 1
0
/* generate a connected graph with uniform probability, subject to some
 * constraints on the degree of the nodes. */
SEXP ide_cozman_graph(SEXP nodes, SEXP num, SEXP burn_in, SEXP max_in_degree,
    SEXP max_out_degree, SEXP max_degree, SEXP connected, SEXP debug) {

SEXP graphlist;

  switch(LENGTH(nodes)) {

    case 1:

      /* there is only one graph with 1 node, and it's empty. */
      graphlist = empty_graph(nodes, num);
      break;

    case 2:
      /* Ide-Cozman has no mixing with only 2 nodes, work around with i.i.d.
       * sampling.*/
      if (isTRUE(connected)) {

        graphlist = ic_2nodes(nodes, num, burn_in, max_in_degree,
                      max_out_degree, max_degree, connected, debug);
        break;

      }/*THEN*/

    default:

      graphlist = c_ide_cozman(nodes, num, burn_in, max_in_degree,
                    max_out_degree, max_degree, connected, debug);

  }/*SWITCH*/

  return graphlist;

}/*IDE_COZMAN_GRAPH*/
Ejemplo n.º 2
0
/* compute the cached values fro all nodes. */
SEXP cache_structure(SEXP nodes, SEXP amat, SEXP debug) {

int i = 0, debuglevel = LOGICAL(debug)[0], length_nodes = LENGTH(nodes);
int *status = NULL, *a = INTEGER(amat);

  SEXP bn, temp;

  /* allocate the list and set its attributes.*/
  PROTECT(bn = allocVector(VECSXP, length_nodes));
  setAttrib(bn, R_NamesSymbol, nodes);

  /* allocate and intialize the status vector. */
  status = alloc1dcont(length_nodes);

  if (isTRUE(debug))
    Rprintf("* (re)building cached information about network structure.\n");

  /* populate the list with nodes' data. */
  for (i = 0; i < length_nodes; i++) {

    /* (re)initialize the status vector. */
    memset(status, '\0', sizeof(int) * length_nodes);

    temp = cache_node_structure(i, nodes, a, length_nodes, status, debuglevel);

    /* save the returned list. */
    SET_VECTOR_ELT(bn, i, temp);

  }/*FOR*/

  UNPROTECT(1);

  return bn;

}/*CACHE_STRUCTURE*/
Ejemplo n.º 3
0
static SEXP build_return_array(SEXP nodes, short int *status, int nrows,
    int check_status, SEXP return_nodes) {

int i = 0, j = 0;
SEXP res;

  if (check_status < 3) {

    if (isTRUE(return_nodes)) {

      PROTECT(res = allocVector(STRSXP, 0));

    }/*THEN*/
    else {

      PROTECT(res = allocVector(LGLSXP, 1));
      LOGICAL(res)[0] = TRUE;

    }/*ELSE*/

  }/*THEN*/
  else {

    if (isTRUE(return_nodes)) {

      PROTECT(res = allocVector(STRSXP, check_status));

      for (i = 0; i < nrows; i++)
        if (status[i] == BAD)
          SET_STRING_ELT(res, j++, STRING_ELT(nodes, i));

    }/*THEN*/
    else {

      PROTECT(res = allocVector(LGLSXP, 1));
      LOGICAL(res)[0] = FALSE;

    }/*ELSE*/

  }/*ELSE*/

  UNPROTECT(1);

  return res;

}/*BUILD_RETURN_ARRAY*/
Ejemplo n.º 4
0
/* get the number of parameters of the whole network (mixed case, also handles
 * discrete and Gaussian networks). */
SEXP nparams_cgnet(SEXP graph, SEXP data, SEXP debug) {

int i = 0, j = 0, nnodes = 0, debuglevel = isTRUE(debug);
int *nlevels = NULL, *index = NULL, ngp = 0;
double nconfig = 0, node_params = 0, all_params = 0;
SEXP nodes = R_NilValue, node_data, parents, temp;

  /* get nodes' number and data. */
  node_data = getListElement(graph, "nodes");
  nnodes = length(node_data);
  nodes = getAttrib(node_data, R_NamesSymbol);
  /* cache the number of levels of each variables (zero = continuous). */
  nlevels = Calloc1D(nnodes, sizeof(int));
  for (i = 0; i < nnodes; i++) {

    temp = VECTOR_ELT(data, i);
    if (TYPEOF(temp) == INTSXP)
      nlevels[i] = NLEVELS(temp);

  }/*FOR*/

  for (i = 0; i < nnodes; i++) {

    /* extract the parents of the node and match them. */
    parents = getListElement(VECTOR_ELT(node_data, i), "parents");
    PROTECT(temp = match(nodes, parents, 0));
    index = INTEGER(temp);

    /* compute the number of regressors and of configurations. */
    for (j = 0, ngp = 0, nconfig = 1; j < length(parents); j++) {

      if (nlevels[index[j] - 1] == 0)
        ngp++;
      else
        nconfig *= nlevels[index[j] - 1];

    }/*FOR*/
    /* compute the overall number of parameters as regressors plus intercept
     * times configurations. */
    node_params = nconfig * (nlevels[i] == 0 ? ngp + 1 : nlevels[i] - 1);

    if (debuglevel > 0)
      Rprintf("* node %s has %.0lf parameter(s).\n", NODE(i), node_params);

    /* update the return value. */
    all_params += node_params;

    UNPROTECT(1);

  }/*FOR*/

  Free1D(nlevels);

  return ScalarReal(all_params);

}/*NPARAMS_CGNET*/
Ejemplo n.º 5
0
/* unconditional mutual information, to be used for the asymptotic test. */
SEXP mi(SEXP x, SEXP y, SEXP gsquare, SEXP adjusted) {

int llx = NLEVELS(x), lly = NLEVELS(y), num = length(x);
int *xx = INTEGER(x), *yy = INTEGER(y);
double *res = NULL;
SEXP result;

  PROTECT(result = allocVector(REALSXP, 2));
  res = REAL(result);
  if (isTRUE(adjusted))
    res[0] = c_chisqtest(xx, llx, yy, lly, num, res + 1, MI_ADF);
  else
    res[0] = c_chisqtest(xx, llx, yy, lly, num, res + 1, MI);

  /* rescale to match the G^2 test. */
  if (isTRUE(gsquare))
    res[0] *= 2 * num;

  UNPROTECT(1);

  return result;

}/*MI*/
Ejemplo n.º 6
0
/* R frontend: compute the score component for each target node. */
SEXP per_node_score(SEXP network, SEXP data, SEXP score, SEXP targets,
    SEXP extra_args, SEXP debug) {

SEXP result;

  /* allocate the return value. */
  PROTECT(result = allocVector(REALSXP, length(targets)));
  /* compute the score componenets. */
  c_per_node_score(network, data, score, targets, extra_args, isTRUE(debug),
    REAL(result));
  /* set labels on the computed score components. */
  setAttrib(result, R_NamesSymbol, targets);

  UNPROTECT(1);

  return result;

}/*PER_NODE_SCORE*/
Ejemplo n.º 7
0
/* compute the cached values for a single node (R-friendly). */
SEXP cache_partial_structure(SEXP nodes, SEXP target, SEXP amat, SEXP debug) {

int i = 0, debuglevel = LOGICAL(debug)[0], length_nodes = LENGTH(nodes);
char *t = (char *)CHAR(STRING_ELT(target, 0));
int *status = NULL, *a = INTEGER(amat);

  if (isTRUE(debug))
    Rprintf("* (re)building cached information about node %s.\n", t);

  /* allocate and initialize the status vector. */
  status = alloc1dcont(length_nodes);

  /* iterate fo find the node position in the array.  */
  for (i = 0; i < length_nodes; i++)
    if (!strcmp(t, CHAR(STRING_ELT(nodes, i))))
      break;

  /* return the corresponding part of the bn structure. */
  return cache_node_structure(i, nodes, a, length_nodes, status, debuglevel);

}/*CACHE_PARTIAL_STRUCTURE*/
Ejemplo n.º 8
0
/* unconditional mutual information, to be used for the asymptotic test. */
SEXP mi(SEXP x, SEXP y, SEXP gsquare) {

int llx = NLEVELS(x), lly = NLEVELS(y), num = LENGTH(x);
int *xx = INTEGER(x), *yy = INTEGER(y);
double *res = NULL;
SEXP result;

  PROTECT(result = allocVector(REALSXP, 2));
  res = REAL(result);
  res[0] = c_mi(xx, &llx, yy, &lly, &num);
  res[1] = (double)(llx - 1) * (double)(lly - 1);

  /* rescale to match the G^2 test. */
  if (isTRUE(gsquare))
    res[0] *= 2 * num;

  UNPROTECT(1);

  return result;

}/*MI*/
Ejemplo n.º 9
0
/* conditional mutual information, to be used for the asymptotic test. */
SEXP cmi(SEXP x, SEXP y, SEXP z, SEXP gsquare) {

int llx = NLEVELS(x), lly = NLEVELS(y), llz = NLEVELS(z);
int num = LENGTH(x);
int *xx = INTEGER(x), *yy = INTEGER(y), *zz = INTEGER(z);
double *res = NULL;
SEXP result;

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 2));
  res = REAL(result);
  res[0] = c_cmi(xx, &llx, yy, &lly, zz, &llz, &num);
  res[1] = (double)(llx - 1) * (double)(lly - 1) * (double)llz;

  /* rescale to match the G^2 test. */
  if (isTRUE(gsquare))
    res[0] *= 2 * num;

  UNPROTECT(1);

  return result;

}/*CMI*/
Ejemplo n.º 10
0
Archivo: bind.c Proyecto: aogbechie/DBN
/* faster rbind() implementation for arc sets. */
SEXP arcs_rbind (SEXP matrix1, SEXP matrix2, SEXP reverse2) {

int i = 0, j = 0, m1 = length(matrix1)/2, m2 = length(matrix2)/2;
SEXP res;

  /* allocate the return value. */
  PROTECT(res = allocMatrix(STRSXP, m1 + m2, 2));
  /* allocate and initialize the column names. */
  finalize_arcs(res);

  /* copy the elements of the first matrix. */
  for (i  = 0; i < m1; i++)
    for (j = 0; j < 2; j++)
      SET_STRING_ELT(res, CMC(i, j, m1 + m2), STRING_ELT(matrix1, CMC(i, j, m1)));

  /* copy the elements of the second matrix, reversing the order of the
   * columns as needed. */
  if (isTRUE(reverse2)) {

    for (i = 0; i < m2; i++)
      for(j = 0; j < 2; j++)
        SET_STRING_ELT(res, CMC(i + m1, j, m1 + m2), STRING_ELT(matrix2, CMC(i, 1 - j, m2)));

  }/*THEN*/
  else {

    for (i = 0; i < m2; i++)
      for(j = 0; j < 2; j++)
        SET_STRING_ELT(res, CMC(i + m1, j, m1 + m2), STRING_ELT(matrix2, CMC(i, j, m2)));

  }/*ELSE*/

  UNPROTECT(1);

  return res;

}/*ARCS_RBIND*/
Ejemplo n.º 11
0
/* unconditional independence tests. */
SEXP utest(SEXP x, SEXP y, SEXP data, SEXP test, SEXP B, SEXP alpha,
    SEXP learning) {

int ntests = length(x), nobs = 0;
double *pvalue = NULL, statistic = 0, df = NA_REAL;
const char *t = CHAR(STRING_ELT(test, 0));
test_e test_type = test_label(t);
SEXP xx, yy, result;

  /* allocate the return value, which has the same length as x. */
  PROTECT(result = allocVector(REALSXP, ntests));
  setAttrib(result, R_NamesSymbol, x);
  pvalue = REAL(result);
  /* set all elements to zero. */
  memset(pvalue, '\0', ntests * sizeof(double));

  /* extract the variables from the data. */
  PROTECT(xx = c_dataframe_column(data, x, FALSE, FALSE));
  PROTECT(yy = c_dataframe_column(data, y, TRUE, FALSE));
  nobs = length(yy);

  if (IS_DISCRETE_ASYMPTOTIC_TEST(test_type)) {

    /* parametric tests for discrete variables. */
    statistic = ut_discrete(xx, yy, nobs, ntests, pvalue, &df, test_type);

  }/*THEN*/
  else if ((test_type == COR) || (test_type == ZF) || (test_type == MI_G) ||
           (test_type == MI_G_SH)) {

    /* parametric tests for Gaussian variables. */
    statistic = ut_gaustests(xx, yy, nobs, ntests, pvalue, &df, test_type);

  }/*THEN*/
  else if (test_type == MI_CG) {

    /* conditional linear Gaussian mutual information test. */
    statistic = ut_micg(xx, yy, nobs, ntests, pvalue, &df);

  }/*THEN*/
  else if (IS_DISCRETE_PERMUTATION_TEST(test_type)) {

    statistic = ut_dperm(xx, yy, nobs, ntests, pvalue, &df, test_type, INT(B),
                  IS_SMC(test_type) ? NUM(alpha) : 1);

  }/*THEN*/
  else if (IS_CONTINUOUS_PERMUTATION_TEST(test_type)) {

    statistic = ut_gperm(xx, yy, nobs, ntests, pvalue, test_type, INT(B),
                  IS_SMC(test_type) ? NUM(alpha) : 1);

  }/*THEN*/

  UNPROTECT(3);

  /* catch-all for unknown tests (after deallocating memory.) */
  if (test_type == ENOTEST)
    error("unknown test statistic '%s'.", t);

  /* increase the test counter. */
  test_counter += ntests;

  if (isTRUE(learning))
    return result;
  else
    return c_create_htest(statistic, test, pvalue[ntests - 1], df, B);

}/*UTEST*/
Ejemplo n.º 12
0
/* check neighbourhood sets and markov blankets for consistency.. */
SEXP bn_recovery(SEXP bn, SEXP strict, SEXP mb, SEXP debug, SEXP filter) {

int i = 0, j = 0, k = 0, n = 0, counter = 0;
short int *checklist = NULL, err = 0;
int *debuglevel = NULL, *checkmb = NULL, *nbrfilter = NULL;
SEXP temp, temp2, nodes, elnames = NULL, fixed;

  /* get the names of the nodes. */
  nodes = getAttrib(bn, R_NamesSymbol);
  n = LENGTH(nodes);

  /* allocate and initialize the checklist. */
  checklist = allocstatus(UPTRI_MATRIX(n));

  /* dereference the debug, mb and filter parameters. */
  debuglevel = LOGICAL(debug);
  checkmb = LOGICAL(mb);
  nbrfilter = INTEGER(filter);

  if (*debuglevel > 0) {

    Rprintf("----------------------------------------------------------------\n");

    if (*checkmb)
      Rprintf("* checking consistency of markov blankets.\n");
    else
      Rprintf("* checking consistency of neighbourhood sets.\n");

   }/*THEN*/

  /* scan the structure to determine the number of arcs.  */
  for (i = 0; i < n; i++) {

     if (*debuglevel > 0)
       Rprintf("  > checking node %s.\n",  NODE(i));

    /* get the entry for the (neighbours|elements of the markov blanket)
       of the node.*/
    temp = getListElement(bn, (char *)NODE(i));
    if (!(*checkmb))
      temp = getListElement(temp, "nbr");

    /* check each element of the array and identify which variable it
       corresponds to. */
    for (j = 0; j < LENGTH(temp); j++) {

      for (k = 0; k < n; k++) {

        /* increment the right element of checklist. */
        if (!strcmp(NODE(k), (char *)CHAR(STRING_ELT(temp, j))))
          checklist[UPTRI(i + 1, k + 1, n)]++;

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  /* if A is a neighbour of B, B is a neighbour of A; therefore each entry in
   * the checklist array must be equal to either zero (if the corresponding
   * nodes are not neighbours) or two (if the corresponding nodes are neighbours).
   * Any other value (typically one) is caused by an incorrect (i.e. asymmetric)
   * neighbourhood structure. The same logic holds for the markov blankets. */
  for (i = 0; i < n; i++)
    for (j = i; j < n; j++) {

      if ((checklist[UPTRI(i + 1, j + 1, n)] != 0) &&
          (checklist[UPTRI(i + 1, j + 1, n)] != 2)) {

        if (*debuglevel > 0) {

          if (*checkmb)
            Rprintf("@ asymmetry in the markov blankets for %s and %s.\n",
              NODE(i), NODE(j));
          else
            Rprintf("@ asymmetry in the neighbourhood sets for %s and %s.\n",
              NODE(i), NODE(j));

        }/*THEN*/

        err = 1;

      }/*THEN*/

    }/*FOR*/

  /* no need to go on if the (neighbourhood sets|markov blankets) are symmetric;
   * otherwise throw either an error or a warning according to the value of the
   * strict parameter. */
  if (!err) {

    return bn;

  }/*THEN*/
  else if (isTRUE(strict)) {

    if (*checkmb)
      error("markov blankets are not symmetric.\n");
    else
      error("neighbourhood sets are not symmetric.\n");

  }/*THEN*/
  else {

    if (*checkmb)
      warning("markov blankets are not symmetric.\n");
    else
      warning("neighbourhood sets are not symmetric.\n");

  }/*ELSE*/

  /* build a correct structure to return. */
  PROTECT(fixed = allocVector(VECSXP, n));
  setAttrib(fixed, R_NamesSymbol, nodes);

  if (!(*checkmb)) {

    /* allocate colnames. */
    PROTECT(elnames = allocVector(STRSXP, 2));
    SET_STRING_ELT(elnames, 0, mkChar("mb"));
    SET_STRING_ELT(elnames, 1, mkChar("nbr"));

  }/*THEN*/

  for (i = 0; i < n; i++) {

    if (!(*checkmb)) {

      /* allocate the "mb" and "nbr" elements of the node. */
      PROTECT(temp = allocVector(VECSXP, 2));
      SET_VECTOR_ELT(fixed, i, temp);
      setAttrib(temp, R_NamesSymbol, elnames);

      /* copy the "mb" part from the old structure. */
      temp2 = getListElement(bn, (char *)NODE(i));
      temp2 = getListElement(temp2, "mb");
      SET_VECTOR_ELT(temp, 0, temp2);

    }/*THEN*/

    /* fix the neighbourhoods with an AND or an OR filter.
     * AND means that both neighbours have to see each other
     * to be neighbours, OR means that one neighbour at least
     * as to see the other one for both to be neighbours. */
    switch(*nbrfilter) {

      case AND_FILTER:

        /* rescan the checklist. */
        for (j = 0; j < n; j++)
          if (checklist[UPTRI(i + 1, j + 1, n)] == 2)
            if (i != j)
              counter++;

        /* allocate and fill the "nbr" element. */
        PROTECT(temp2 = allocVector(STRSXP, counter));

        for (j = 0; j < n; j++)
          if (checklist[UPTRI(i + 1, j + 1, n)] == 2)
            if (i != j)
              SET_STRING_ELT(temp2, --counter, STRING_ELT(nodes, j));

        break;

      case OR_FILTER:

        /* rescan the checklist. */
        for (j = 0; j < n; j++)
          if (checklist[UPTRI(i + 1, j + 1, n)] >= 1)
            if (i != j)
              counter++;

        /* allocate and fill the "nbr" element. */
        PROTECT(temp2 = allocVector(STRSXP, counter));

        for (j = 0; j < n; j++)
          if (checklist[UPTRI(i + 1, j + 1, n)] >= 1)
            if (i != j)
              SET_STRING_ELT(temp2, --counter, STRING_ELT(nodes, j));

        break;
    }

    if (*checkmb) {

      SET_VECTOR_ELT(fixed, i, temp2);
      UNPROTECT(1);

    }/*THEN*/
    else {

      SET_VECTOR_ELT(temp, 1, temp2);
      UNPROTECT(2);

    }/*ELSE*/

  }/*FOR*/

  if (*checkmb)
    UNPROTECT(1);
  else
    UNPROTECT(2);

return fixed;

}/*BN_RECOVERY*/
Ejemplo n.º 13
0
/* set the directions of the arcs in a tree given the root node. */
SEXP tree_directions(SEXP arcs, SEXP nodes, SEXP root, SEXP debug) {

int i = 0, j = 0, d = 0, traversed = 1;
int narcs = length(arcs)/2, nnodes = length(nodes);
int *a = NULL, *depth = 0, debuglevel = isTRUE(debug);
SEXP try, try2, result;

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  a = INTEGER(try);

  /* match the root node. */
  PROTECT(try2 = match(nodes, root, 0));

  /* allocate and initialize the statust vector. */
  depth = Calloc1D(nnodes, sizeof(int));
  depth[INT(try2) - 1] = 1;

  if (debuglevel > 0)
    Rprintf("> root node (depth 1) is %s.\n", NODE(INT(try2) - 1));

  for (d = 1; d <= nnodes; d++) {

    if (debuglevel > 0)
      Rprintf("> considering nodes at depth %d.\n", d + 1);

    for (i = 0; i < narcs; i++) {

      for (j = 0; j < nnodes; j++) {

        /* disregard nodes at the wrong depth. */
        if (depth[j] != d)
          continue;

        if ((a[i + narcs] == (j + 1)) && (depth[a[i] - 1] == 0)) {

          if (debuglevel > 0)
            Rprintf("  * found node %s.\n", NODE(a[i] - 1));

          /* save the depth at which the node was found. */
          depth[a[i] - 1] = d + 1;

          /* update the counter of the traversed nodes. */
          traversed++;

        }/*THEN*/

      }/*FOR*/

    }/*FOR*/

    /* check whether all nodes have been traversed. */
    if (traversed == nnodes)
      break;

  }/*FOR*/

  /* allocate and initialize the return value. */
  PROTECT(result = allocMatrix(STRSXP, narcs/2, 2));

  for (i = 0, j = 0; i < narcs; i++) {

    if (depth[a[i] - 1] < depth[a[i + narcs] - 1]) {

      SET_STRING_ELT(result, j, STRING_ELT(arcs, i));
      SET_STRING_ELT(result, j + narcs/2, STRING_ELT(arcs, i + narcs));
      j++;

    }/*THEN*/

  }/*FOR*/

  UNPROTECT(3);

  Free1D(depth);

  return result;

}/*TREE_DIRECTIONS*/
Ejemplo n.º 14
0
/* Chow-Liu structure learning algorithm. */
SEXP chow_liu(SEXP data, SEXP nodes, SEXP estimator, SEXP whitelist,
    SEXP blacklist, SEXP conditional, SEXP debug) {

int i = 0, j = 0, k = 0, debug_coord[2], ncol = length(data);
int num = length(VECTOR_ELT(data, 0)), narcs = 0, nwl = 0, nbl = 0;
int *nlevels = NULL, clevels = 0, *est = INTEGER(estimator), *depth = NULL;
int *wl = NULL, *bl = NULL, *poset = NULL, debuglevel = isTRUE(debug);
void **columns = NULL, *cond = NULL;
short int *include = NULL;
double *mim = NULL, *means = NULL, *sse = NULL;
SEXP arcs, wlist, blist;

  /* dereference the columns of the data frame. */
  DEREFERENCE_DATA_FRAME()

  /* only TAN uses a conditional variable, so assume it's discrete and go ahead. */
  if (conditional != R_NilValue) {

    cond = (void *) INTEGER(conditional);
    clevels = NLEVELS(conditional);

  }/*THEN*/

  /* allocate the mutual information matrix and the status vector. */
  mim = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(double));
  include = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(short int));

  /* compute the pairwise mutual information coefficients. */
  if (debuglevel > 0)
    Rprintf("* computing pairwise mutual information coefficients.\n");

  mi_matrix(mim, columns, ncol, nlevels, &num, cond, &clevels, means, sse, est);

  LIST_MUTUAL_INFORMATION_COEFS();

  /* add whitelisted arcs first. */
  if ((!isNull(whitelist)) && (length(whitelist) > 0)) {

    PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE));
    wl = INTEGER(wlist);
    nwl = length(wlist);

    for (i = 0; i < nwl; i++) {

      if (debuglevel > 0) {

        Rprintf("* adding whitelisted arcs first.\n");

        if (include[wl[i]] == 0) {

          Rprintf("  > arc %s - %s has been added to the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was already present in the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (include[wl[i]] == 0)
        narcs++;
      /* include the arc in the graph. */
      include[wl[i]] = 1;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  /* cache blacklisted arcs. */
  if ((!isNull(blacklist)) && (length(blacklist) > 0)) {

    PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE));
    bl = INTEGER(blist);
    nbl = length(blist);

  }/*THEN*/

  /* sort the mutual information coefficients and keep track of the elements' index.  */
  poset = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(int));
  for (i = 0; i < UPTRI3_MATRIX(ncol); i++)
    poset[i] = i;
  R_qsort_I(mim, poset, 1, UPTRI3_MATRIX(ncol));

  depth = Calloc1D(ncol, sizeof(int));

  for (i = UPTRI3_MATRIX(ncol) - 1; i >= 0; i--) {

    /* get back the coordinates from the position in the half-matrix. */
    INV_UPTRI3(poset[i], ncol, debug_coord);

    /* already included all the arcs we had to, exiting. */
    if (narcs >= ncol - 1)
      break;
    /* arc already present in the graph, nothing to do. */
    if (include[poset[i]] == 1)
      continue;

    if (bl) {

      if (chow_liu_blacklist(bl, &nbl, poset + i)) {

        if (debuglevel > 0) {

          Rprintf("* arc %s - %s is blacklisted, skipping.\n",
            NODE(debug_coord[0]), NODE(debug_coord[1]));

        }/*THEN*/

        continue;

      }/*THEN*/

    }/*THEN*/

    if (c_uptri3_path(include, depth, debug_coord[0], debug_coord[1], ncol,
          nodes, FALSE)) {

      if (debuglevel > 0) {

        Rprintf("* arc %s - %s introduces cycles, skipping.\n",
          NODE(debug_coord[0]), NODE(debug_coord[1]));

      }/*THEN*/

      continue;

    }/*THEN*/

    if (debuglevel > 0) {

      Rprintf("* adding arc %s - %s with mutual information %lf.\n",
        NODE(debug_coord[0]), NODE(debug_coord[1]), mim[i]);

    }/*THEN*/

    /* include the arc in the graph. */
    include[poset[i]] = 1;
    /* update the counter. */
    narcs++;

  }/*FOR*/

  if ((!isNull(blacklist)) && (length(blacklist) > 0))
    UNPROTECT(1);

  /* sanity check for blacklist-related madnes. */
  if (narcs != ncol - 1)
    error("learned %d arcs instead of %d, this is not a tree spanning all the nodes.",
      narcs, ncol - 1);

  CONVERT_TO_ARC_SET(include, 0, 2 * (ncol - 1));

  Free1D(depth);
  Free1D(mim);
  Free1D(include);
  Free1D(poset);
  Free1D(columns);
  if (nlevels)
    Free1D(nlevels);
  if (means)
    Free1D(means);
  if (sse)
    Free1D(sse);

  return arcs;

}/*CHOW_LIU*/
Ejemplo n.º 15
0
/* ARACNE structure learning algorithm. */
SEXP aracne(SEXP data, SEXP estimator, SEXP whitelist, SEXP blacklist, SEXP debug) {

int i = 0, j = 0, k = 0, coord = 0, ncol = length(data);
int num = length(VECTOR_ELT(data, i)), narcs = ncol * (ncol - 1) / 2;
int *nlevels = NULL, *est = INTEGER(estimator), *wl = NULL, *bl = NULL;
int debuglevel = isTRUE(debug);
void **columns = NULL;
short int *exclude = NULL;
double *mim = NULL, *means = NULL, *sse = NULL;
SEXP arcs, nodes, wlist, blist;

  PROTECT(nodes = getAttrib(data, R_NamesSymbol));

  /* dereference the columns of the data frame. */
  DEREFERENCE_DATA_FRAME()

  /* allocate the mutual information matrix and the status vector. */
  mim = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(double));
  exclude = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(short int));

  /* compute the pairwise mutual information coefficients. */
  if (debuglevel > 0)
    Rprintf("* computing pairwise mutual information coefficients.\n");

  mi_matrix(mim, columns, ncol, nlevels, &num, NULL, NULL, means, sse, est);

  LIST_MUTUAL_INFORMATION_COEFS()

  /* compare all the triplets. */
  for (i = 0; i < ncol; i++) {

    for (j = i + 1; j < ncol; j++) {

      for (k = 0; k < ncol; k++) {

        if ((k == i) || (k == j))
          continue;

        /* cache the UPTRI3 coordinates of the arc. */
        coord = UPTRI3(i + 1, j + 1, ncol);

        /* if MI(X, Y) < min(MI(X, Z), MI(Z, Y)) drop arc X - Y. */
        if ((mim[coord] < mim[UPTRI3(i + 1, k + 1, ncol)]) &&
            (mim[coord] < mim[UPTRI3(j + 1, k + 1, ncol)])) {

          if (debuglevel > 0) {

            Rprintf("* dropping arc %s - %s because of %s, %lf < min(%lf, %lf)\n",
              NODE(i), NODE(j), NODE(k), mim[UPTRI3(i + 1, j + 1, ncol)],
              mim[UPTRI3(i + 1, k + 1, ncol)], mim[UPTRI3(j + 1, k + 1, ncol)]);

          }/*THEN*/

          /* update the status vector. */
          exclude[coord] = 1;
          /* decrement the number of arcs. */
          narcs--;

          break;

        }/*THEN*/

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  /* add back whitelisted arcs. */
  if ((!isNull(whitelist)) && (length(whitelist) > 0)) {

    PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE));
    wl = INTEGER(wlist);

    for (i = 0; i < length(wlist); i++) {

      if (debuglevel > 0) {

        Rprintf("* adding back whitelisted arcs.\n");

        if (exclude[wl[i]] == 1) {

          Rprintf("  > arc %s - %s has been added to the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + length(wlist))));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was already present in the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + length(wlist))));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (exclude[wl[i]] == 1)
        narcs++;
      /* include the arc in the graph. */
      exclude[wl[i]] = 0;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  /* remove blacklisted arcs. */
  if ((!isNull(blacklist)) && (length(blacklist) > 0)) {

    PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE));
    bl = INTEGER(blist);

    for (i = 0; i < length(blist); i++) {

      if (debuglevel > 0) {

        Rprintf("* removing blacklisted arcs.\n");

        if (exclude[bl[i]] == 0) {

          Rprintf("  > arc %s - %s has been dropped from the graph.\n",
            CHAR(STRING_ELT(blacklist, i)), CHAR(STRING_ELT(blacklist, i + length(blist))));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was not present in the graph.\n",
            CHAR(STRING_ELT(blacklist, i)), CHAR(STRING_ELT(blacklist, i + length(blist))));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (exclude[bl[i]] == 0)
        narcs--;
      /* remove the arc from the graph. */
      exclude[bl[i]] = 1;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  CONVERT_TO_ARC_SET(exclude, 1, 2 * narcs);

  Free1D(mim);
  Free1D(exclude);
  Free1D(columns);
  if (nlevels)
    Free1D(nlevels);
  if (means)
    Free1D(means);
  if (sse)
    Free1D(sse);

  UNPROTECT(1);

  return arcs;

}/*ARACNE*/
Ejemplo n.º 16
0
/* compute the number of parameters of a fitted model. */
SEXP nparams_fitted(SEXP bn, SEXP effective, SEXP debug) {

int i = 0, j = 0, k = 0, node_params = 0, nnodes = length(bn), *pd = NULL;
int res = 0, debuglevel = isTRUE(debug), eff = isTRUE(effective);
double *ps = NULL, counter = 0;
SEXP nodes = R_NilValue, node_data, param_set, param_dims;

  if (debuglevel > 0)
    nodes = getAttrib(bn, R_NamesSymbol);

  for (i = 0; i < nnodes; i++) {

    /* get the node's data. */
    node_data = VECTOR_ELT(bn, i);
    /* get its probability distribution (if discrete). */
    param_set = getListElement(node_data, "prob");

    if (!isNull(param_set)) {

      /* get the dimensions of the conditional probability table. */
      param_dims = getAttrib(param_set, R_DimSymbol);
      pd = INTEGER(param_dims);
      ps = REAL(param_set);

      if (eff) {

        /* count the number of non-zero free parameters. */
        for (node_params = 0, k = 0; k < length(param_set) / pd[0]; k++) {

          /* check the elements of each conditional probability distribution. */
          for (counter = 0, j = 0; j < pd[0]; j++)
            counter += !ISNAN(ps[CMC(j, k, pd[0])]) && (ps[CMC(j, k, pd[0])] > 0);
          /* subtract the column total to get the free parameters. */
          if (counter > 0)
            counter--;

          node_params += counter;

        }/*FOR*/

      }/*THEN*/
      else {

        /* compute the number of parameters. */
        for (node_params = 1, j = 1; j < length(param_dims); j++)
          node_params *= pd[j];

        node_params *= pd[0] - 1;

      }/*ELSE*/

    }/*THEN*/
    else {

      /* get the vector (or matrix) of regression coefficients. */
      param_set = getListElement(node_data, "coefficients");
      ps = REAL(param_set);

      /* this is a continuous node, so it's a lot easier. */
      if (eff) {

        /* count the number of non-zero regression coefficients. */
        for (node_params = 0, j = 0; j < length(param_set); j++)
          node_params += (ps[j] != 0);

      }/*THEN*/
      else {

        /* compute the number of parameters. */
        node_params = length(param_set);

      }/*ELSE*/

    }/*ELSE*/

    if (debuglevel > 0)
      Rprintf("* node %s has %d parameter(s).\n", NODE(i), node_params);

    res += node_params;

  }/*FOR*/

  return ScalarInteger(res);

}/*NPARAMS_FITTED*/
Ejemplo n.º 17
0
SEXP hc_to_be_added(SEXP arcs, SEXP blacklist, SEXP whitelist, SEXP nparents,
    SEXP maxp, SEXP nodes, SEXP convert) {

int i = 0, j = 0, narcs = 0, dims = length(nodes);
int *a = NULL, *coords = NULL;
double *mp = REAL(maxp), *np = NULL;
short int referenced = 0;
SEXP try, result = R_NilValue, result2;

  /* transform the arc set into an adjacency matrix, if it's not one already. */
  if (isInteger(arcs)) {

    if ((referenced = MAYBE_REFERENCED(arcs)))
      PROTECT(result = duplicate(arcs));

  }/*THEN*/
  else {

    PROTECT(result = arcs2amat(arcs, nodes));

  }/*ELSE*/

  /* dereference the adjacency matrix once and for all. */
  a = INTEGER(result);

  /* compute the number the parents of each node, unless provided. */
  if (nparents == R_NilValue) {

    np = Calloc1D(dims, sizeof(double));
    for (i = 0; i < dims; i++)
      for (j = 0; j < dims; j++)
        np[j] = a[CMC(i, j, dims)];

  }/*THEN*/
  else {

    np = REAL(nparents);

  }/*ELSE*/

  /* flip all the nondiagonal cells. */
  for (j = 0; j < dims; j++) {

    for (i = 0; i < dims; i++) {

      /* diagonal elements are always equal to zero, skip them. */
      if (i == j)
        continue;

      a[CMC(i, j, dims)] = 1 - a[CMC(i, j, dims)];

    }/*FOR*/

  }/*FOR*/

  /* if an arc is present in the graph in one direction, you cannot add it in
   * the other direction (it would be a reversal); flip both in the adjacency
   * matrix. */
  for (j = 0; j < dims; j++)
    for (i = j + 1; i < dims; i++)
      a[CMC(j, i, dims)] = a[CMC(i, j, dims)] = a[CMC(i, j, dims)] * a[CMC(j, i, dims)];

  /* if a node has already reached its maximum number parents, do not add
   * more arcs pointing to that node. */
  for (j = 0; j < dims; j++)
    if (np[j] >= *mp)
      memset(a + j * dims, '\0', dims * sizeof(int));

#define FLIP_FROM_LIST(list, value) \
  if (!isNull(list)) { \
    if (!isInteger(list)) { \
      PROTECT(try = match(nodes, list, 0)); \
      coords = INTEGER(try); \
      narcs = length(try)/2; \
      for (i = 0; i < narcs; i++)  \
        a[CMC(coords[i] - 1, coords[i + narcs] - 1, dims)] = value; \
      UNPROTECT(1); \
    }/*THEN*/ \
    else { \
      coords = INTEGER(list); \
      for (i = 0; i < dims * dims; i ++) \
        if (coords[i] == 1) \
          a[i] = value; \
    }/*ELSE*/ \
  }/*THEN*/

  /* now the blacklist gets involved. */
  FLIP_FROM_LIST(blacklist, 0);
  /* and, last but not least, the whitelist gets involved. */
  FLIP_FROM_LIST(whitelist, 1);

  if (nparents == R_NilValue)
    Free1D(np);

  /* return either the adjacency matrix or the arc set. */
  if (isTRUE(convert)) {

    PROTECT(result2 = amat2arcs(result, nodes));

    if (referenced || !isInteger(arcs))
      UNPROTECT(2);
    else
      UNPROTECT(1);
    return result2;

  }/*THEN*/
  else {

    if (referenced || !isInteger(arcs))
      UNPROTECT(1);
    return result;

  }/*ELSE*/

}/*HC_TO_BE_ADDED*/
Ejemplo n.º 18
0
/* return the skeleton of a DAG/PDAG. */
SEXP dag2ug(SEXP bn, SEXP moral, SEXP debug) {

int i = 0, j = 0, k = 0, nnodes = 0, narcs = 0, row = 0;
int debuglevel = isTRUE(debug), *moralize = LOGICAL(moral);
int *nparents = NULL, *nnbr = NULL;
SEXP node_data, current, nodes, result, temp;

  /* get the nodes' data. */
  node_data = getListElement(bn, "nodes");
  nnodes = length(node_data);
  PROTECT(nodes = getAttrib(node_data, R_NamesSymbol));

  /* allocate and initialize parents' and neighbours' counters. */
  nnbr = Calloc1D(nnodes, sizeof(int));
  if (*moralize > 0)
    nparents = Calloc1D(nnodes, sizeof(int));

  /* first pass: count neighbours, parents and resulting arcs. */
  for (i = 0; i < nnodes; i++) {

    /* get the number of neighbours.  */
    current = VECTOR_ELT(node_data, i);
    nnbr[i] = length(getListElement(current, "nbr"));

    /* update the number of arcs to be returned. */
    if (*moralize > 0) {

      /* get also the number of parents, needed to account for the arcs added
       * for their moralization. */
      nparents[i] = length(getListElement(current, "parents"));
      narcs += nnbr[i] + nparents[i] * (nparents[i] - 1);

    }/*THEN*/
    else {

      narcs += nnbr[i];

    }/*ELSE*/

    if (debuglevel > 0)  {

      if (*moralize > 0) {

        Rprintf("* scanning node %s, found %d neighbours and %d parents.\n",
          NODE(i), nnbr[i], nparents[i]);
        Rprintf("  > adding %d arcs, for a total of %d.\n",
          nnbr[i] + nparents[i] * (nparents[i] - 1), narcs);

      }/*THEN*/
      else {

        Rprintf("* scanning node %s, found %d neighbours.\n", NODE(i), nnbr[i]);
        Rprintf("  > adding %d arcs, for a total of %d.\n", nnbr[i], narcs);

      }/*ELSE*/

    }/*THEN*/

  }/*FOR*/

  /* allocate the return value. */
  PROTECT(result = allocMatrix(STRSXP, narcs, 2));
  /* allocate and set the column names. */
  setDimNames(result, R_NilValue, mkStringVec(2, "from", "to"));

  /* second pass: fill the return value. */
  for (i = 0; i < nnodes; i++) {

    /* get to the current node. */
    current = VECTOR_ELT(node_data, i);
    /* get the neighbours. */
    temp = getListElement(current, "nbr");

    for (j = 0; j < nnbr[i]; j++) {

      SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(nodes, i));
      SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, j));
      row++;

    }/*FOR*/

    /* if we are not creating a moral graph we are done with this node. */
    if (*moralize == 0)
      continue;

    /* get the parents. */
    temp = getListElement(current, "parents");

    for (j = 0; j < nparents[i]; j++) {

      for (k = j+1; k < nparents[i]; k++) {

        SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(temp, k));
        SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, j));
        row++;
        SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(temp, j));
        SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, k));
        row++;

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  Free1D(nnbr);

  if (*moralize > 0) {

    /* be really sure not to return duplicate arcs in moral graphs when
     * shielded parents are present (the "shielding" nodes are counted
     * twice). */
    result = c_unique_arcs(result, nodes, FALSE);

    Free1D(nparents);

  }/*THEN*/

  UNPROTECT(2);

  return result;

}/*DAG2UG*/
Ejemplo n.º 19
0
/* construct a consistent DAG extension of a CPDAG. */
SEXP pdag_extension(SEXP arcs, SEXP nodes, SEXP debug) {

int i = 0, j = 0, k = 0, t = 0, nnodes = length(nodes);
int changed = 0, left = nnodes;
int *a = NULL, *nbr = NULL, debuglevel = isTRUE(debug);
short int *matched = NULL;
SEXP amat, result;

  /* build and dereference the adjacency matrix. */
  PROTECT(amat = arcs2amat(arcs, nodes));
  a = INTEGER(amat);

  /* allocate and initialize the neighbours and matched vectors. */
  nbr = Calloc1D(nnodes, sizeof(int));
  matched = Calloc1D(nnodes, sizeof(short int));

  for (t = 0; t < nnodes; t++) {

    if (debuglevel > 0) {

      Rprintf("----------------------------------------------------------------\n");
      Rprintf("> performing pass %d.\n", t + 1);
      Rprintf("> candidate nodes: ");
        for (j = 0; j < nnodes; j++)
          if (matched[j] == 0)
            Rprintf("%s ", NODE(j));
      Rprintf("\n");

    }/*THEN*/

    for (i = 0; i < nnodes; i++) {

      /* if the node is already ok, skip it. */
      if (matched[i] != 0)
        continue;

      /* check whether the node is a sink (that is, whether is does not have
       * any child). */
      is_a_sink(a, i, &k, nnodes, nbr, matched);

      /* if the node is not a sink move on. */
      if (k == -1) {

        if (debuglevel > 0)
          Rprintf("  * node %s is not a sink.\n", NODE(i));

        continue;

      }/*THEN*/
      else {

        if (debuglevel > 0)
          Rprintf("  * node %s is a sink.\n", NODE(i));

      }/*ELSE*/

      if (!all_adjacent(a, i, k, nnodes, nbr)) {

        if (debuglevel > 0)
          Rprintf("  * not all nodes linked to %s by an undirected arc are adjacent.\n", NODE(i));

        continue;

      }/*THEN*/
      else {

        if (debuglevel > 0) {

          if (k == 0)
            Rprintf("  * no node is linked to %s by an undirected arc.\n", NODE(i));
          else
            Rprintf("  * all nodes linked to %s by an undirected arc are adjacent.\n", NODE(i));

        }/*THEN*/

      }/*ELSE*/

      /* the current node meets all the conditions, direct all the arcs towards it. */
      if (k == 0) {

        if (debuglevel > 0)
          Rprintf("  @ no undirected arc to direct towards %s.\n", NODE(i));

      }/*THEN*/
      else {

        for (j = 0; j < k; j++)
          a[CMC(i, nbr[j], nnodes)] = 0;

        if (debuglevel > 0)
          Rprintf("  @ directing all incident undirected arcs towards %s.\n", NODE(i));

      }/*ELSE*/

      /* set the changed flag. */
      changed = 1;

      /* exclude the node from later iterations. */
      matched[i] = 1;
      left--;

    }/*FOR*/

    /* if nothing changed in the last iteration or there are no more candidate
     * nodes, there is nothing else to do. */
    if ((changed == 0) || (left == 0))
      break;
    else
      changed = 0;

  }/*FOR*/

  /* build the new arc set from the adjacency matrix. */
  PROTECT(result = amat2arcs(amat, nodes));

  Free1D(nbr);
  Free1D(matched);
  UNPROTECT(2);

  return result;

}/*PDAG_EXTENSION*/
Ejemplo n.º 20
0
SEXP is_pdag_acyclic(SEXP arcs, SEXP nodes, SEXP return_nodes, 
    SEXP directed, SEXP debug) {

int i = 0, j = 0, z = 0;
int nrows = LENGTH(nodes);
int check_status = nrows, check_status_old = nrows;
int *rowsums = NULL, *colsums = NULL, *crossprod = NULL, *a = NULL;
int *debuglevel = NULL;
short int *status = NULL;
SEXP amat;

  /* dereference the debug parameter. */
  debuglevel = LOGICAL(debug);

  /* build the adjacency matrix from the arc set.  */
  if (*debuglevel > 0)
    Rprintf("* building the adjacency matrix.\n");

  PROTECT(amat = arcs2amat(arcs, nodes));
  a = INTEGER(amat);

  /* should we consider only directed arcs? */
  if (isTRUE(directed)) {

    /* removing undirected arcs, so that only cycles made only of directed
     * arcs will make the function return TRUE. */

    for (i = 0; i < nrows; i++)
      for (j = 0; j < nrows; j++)
        if ((a[CMC(i, j, nrows)] == 1) && (a[CMC(j, i, nrows)] == 1))
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

  }/*THEN*/

  /* initialize the status, {row,col}sums and crossprod arrays. */
  status = allocstatus(nrows);
  rowsums = alloc1dcont(nrows);
  colsums = alloc1dcont(nrows);
  crossprod = alloc1dcont(nrows);

  if (*debuglevel > 0)
    Rprintf("* checking whether the partially directed graph is acyclic.\n");

  /* even in the worst case scenario at least two nodes are marked as
   * good at each iteration, so even ceil(nrows/2) iterations should be
   * enough. */
  for (z = 0; z < nrows; z++) {

start:

    if (*debuglevel > 0)
      Rprintf("* beginning iteration %d.\n", z + 1);

    for (i = 0; i < nrows; i++) {

      /* skip known-good nodes. */
      if (status[i] == GOOD) continue;

      /* reset and update row and column totals. */
      rowsums[i] = colsums[i] = crossprod[i] = 0;

      /* compute row and column totals for the i-th node. */
      for (j = 0; j < nrows; j++) {

        rowsums[i] += a[CMC(i, j, nrows)];
        colsums[i] += a[CMC(j, i, nrows)];
        crossprod[i] += a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)];

      }/*FOR*/

there:

      if (*debuglevel > 0)
        Rprintf("  > checking node %s (%d child(ren), %d parent(s), %d neighbours).\n",
          NODE(i), rowsums[i], colsums[i], crossprod[i]);

      /* if either total is zero, the node is either a root node or a
       * leaf node, and is not part of any cycle. */
      if (((rowsums[i] == 0) || (colsums[i] == 0)) ||
          ((crossprod[i] == 1) && (rowsums[i] == 1) && (colsums[i] == 1))) {

        if (*debuglevel > 0)
          Rprintf("  @ node %s is cannot be part of a cycle.\n", NODE(i));

        /* update the adjacency matrix and the row/column totals. */
        for (j = 0; j < nrows; j++)
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

        rowsums[i] = colsums[i] = crossprod[i] = 0;

        /* mark the node as good. */
        status[i] = GOOD;
        check_status--;

      }/*THEN*/
      else if (crossprod[i] == 1) {

        /* find the other of the undirected arc. */
        for (j = 0; j < i; j++)
          if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1)
            break;

        /* safety check, just in case. */
        if (i == j) continue;

        if (((colsums[i] == 1) && (colsums[j] == 1)) ||
            ((rowsums[i] == 1) && (rowsums[j] == 1))) {

          if (*debuglevel > 0)
            Rprintf("  @ arc %s - %s is cannot be part of a cycle.\n", NODE(i), NODE(j));

          /* update the adjacency matrix and the row/column totals. */
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;
          crossprod[i] = 0;
          rowsums[i]--;
          colsums[i]--;
          rowsums[j]--;
          colsums[j]--;

          /* jump back to the first check; if either the row or column total
           * was equal to 1 only because of the undirected arc, the node can
           * now be marked as good. */
          if ((rowsums[i] == 0) || (colsums[i] == 0))
            goto there;

        }/*THEN*/

      }/*THEN*/

    }/*FOR*/

    /* at least three nodes are needed to have a cycle. */
    if (check_status < 3) {

      if (*debuglevel > 0)
        Rprintf("@ at least three nodes are needed to have a cycle.\n");

      UNPROTECT(1);
      return build_return_array(nodes, status, nrows, check_status, return_nodes);

    }/*THEN*/

    /* if there are three or more bad nodes and there was no change in
     * the last iteration, the algorithm is stuck on a cycle. */
    if (check_status_old == check_status) {

      if (*debuglevel > 0)
        Rprintf("@ no change in the last iteration.\n");

      /* give up and call c_has_path() to kill some undirected arcs. */
      for (i = 0; i < nrows; i++)
        for (j = 0; j < i; j++)
          if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1) {

            /* remove the arc from the adjacency matrix while testing it,
             * there's a path is always found (the arc itself). */
            a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

            if(!c_has_path(i, j, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE) &&
               !c_has_path(j, i, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE)) {

              if (*debuglevel > 0)
                Rprintf("@ arc %s - %s is not part of any cycle, removing.\n", NODE(i), NODE(j));

              /* increase the iteration counter and start again. */
              z++;
              goto start;

            }/*THEN*/
            else {

              /* at least one cycle is really present; give up and return.  */
              UNPROTECT(1);
              return build_return_array(nodes, status, nrows, check_status, return_nodes);

            }/*ELSE*/

          }/*THEN*/

      /* give up if there are no undirected arcs, cycles composed
       * entirely by directed arcs are never false positives. */
      UNPROTECT(1);
      return build_return_array(nodes, status, nrows, check_status, return_nodes);

    }/*THEN*/
    else {

      check_status_old = check_status;

    }/*ELSE*/

  }/*FOR*/

  UNPROTECT(1);
  return build_return_array(nodes, status, nrows, check_status, return_nodes);

}/*IS_PDAG_ACYCLIC*/
Ejemplo n.º 21
0
/* remove duplicate arcs from the arc set. */
SEXP unique_arcs(SEXP arcs, SEXP nodes, SEXP warn) {

  return c_unique_arcs(arcs, nodes, isTRUE(warn));

}/*UNIQUE_ARCS*/
Ejemplo n.º 22
0
/* predict the values of one or more variables given one or more variables by
 * maximum a posteriori (MAP). */
SEXP mappred(SEXP node, SEXP fitted, SEXP data, SEXP n, SEXP from, SEXP debug) {

int i = 0, j = 0, k = 0, nobs = 0, nev = 0, nlvls = 0;
int *vartypes = NULL, nsims = INT(n), debuglevel = isTRUE(debug);
void **varptrs = NULL, **evptrs = NULL, *pred = NULL, *res = NULL;
SEXP result, colnames, evidence, evmatch, temp = R_NilValue;
SEXP cpdist, predicted, lvls = R_NilValue;
double *wgt = NULL;
long double *lvls_counts = NULL;

  /* extract the names of the variables in the data. */
  colnames = getAttrib(data, R_NamesSymbol);

  /* remove the name of the variable to predict. */
  nev = length(from);
  PROTECT(evmatch = match(colnames, from, 0));

  /* cache variable types and pointers. */
  vartypes = alloc1dcont(nev);
  varptrs = alloc1dpointer(nev);
  for (j = 0, k = 0; j < nev; j++) {

    temp = VECTOR_ELT(data, INTEGER(evmatch)[j] - 1);
    vartypes[k] = TYPEOF(temp);
    varptrs[k++] = DATAPTR(temp);

  }/*FOR*/

  /* cache the sample size. */
  nobs = length(temp);

  /* allocate a list to hold the evidence. */
  PROTECT(evidence = allocVector(VECSXP, nev));
  setAttrib(evidence, R_NamesSymbol, from);

  /* cache pointers to the elements of the evidence .*/
  evptrs = alloc1dpointer(nev);

  for (j = 0; j < nev; j++) {

    PROTECT(temp = allocVector(vartypes[j], 1));
    evptrs[j] = DATAPTR(temp);
    SET_VECTOR_ELT(evidence, j, temp);
    UNPROTECT(1);

  }/*FOR*/

  /* make the evidence a data frame to compact debugging output. */
  minimal_data_frame(evidence);

  /* allocate the return value. */
  PROTECT(result = fitnode2df(fitted, STRING_ELT(node, 0), nobs));
  res = DATAPTR(result);

  /* in the case of discrete variables, allocate scratch space for levels'
   * frequencies. */
  if (TYPEOF(result) == INTSXP) {

    lvls = getAttrib(result, R_LevelsSymbol);
    nlvls = length(lvls);
    lvls_counts = allocldouble(nlvls);

  }/*THEN*/

  /* allocate the weights. */
  wgt = alloc1dreal(nsims);

  /* allocate sratch space for the random samplings. */
  PROTECT(cpdist = fit2df(fitted, nsims));
  predicted = getListElement(cpdist, (char *)CHAR(STRING_ELT(node, 0)));
  pred = DATAPTR(predicted);

  /* iterate over the observations. */
  for (i = 0; i < nobs; i++) {

    /* copy the values into the list. */
    for (j = 0; j < nev; j++) {

      switch(vartypes[j]) {

        case REALSXP:

          *((double *)evptrs[j]) = ((double *)varptrs[j])[i];
          break;

        case INTSXP:

          *((int *)evptrs[j]) = ((int *)varptrs[j])[i];
          break;

      }/*SWITCH*/

    }/*FOR*/

    if (debuglevel > 0) {

      Rprintf("* predicting observation %d conditional on:\n", i);
      PrintValue(evidence);

    }/*THEN*/

    /* generate samples from the conditional posterior distribution. */
    c_rbn_master(fitted, cpdist, n, evidence, FALSE);
    /* compute the weights. */
    c_lw_weights(fitted, cpdist, nsims, wgt, from, FALSE);

    /* compute the posterior estimate. */
    switch(TYPEOF(predicted)) {

      case REALSXP:

        /* average the predicted values. */
        ((double *)res)[i] = posterior_mean((double *)pred, wgt, nsims,
                               debuglevel);
        break;

      case INTSXP:

        /* pick the most frequent value. */
        ((int *)res)[i] = posterior_mode((int *)pred, wgt, nsims, lvls_counts,
                            lvls, nlvls, debuglevel);
        break;

    }/*SWITCH*/

  }/*FOR*/

  UNPROTECT(4);

  return result;

}/*MAPPRED*/
Ejemplo n.º 23
0
SEXP score_cache_fill(SEXP nodes, SEXP data, SEXP network, SEXP score,
    SEXP extra, SEXP reference, SEXP equivalence, SEXP decomposability,
    SEXP updated, SEXP amat, SEXP cache, SEXP blmat, SEXP debug) {

int *colsum = NULL, nnodes = length(nodes), lupd = length(updated);
int *a = NULL, *upd = NULL, *b = NULL, debuglevel = isTRUE(debug);
int i = 0, j = 0, k = 0;
double *cache_value = NULL;
SEXP arc, delta, op, temp;

  /* save a pointer to the adjacency matrix, the blacklist and the
   * updated nodes. */
  a = INTEGER(amat);
  b = INTEGER(blmat);
  upd = INTEGER(updated);

  /* if there are no nodes to update, return. */
  if (lupd == 0) return cache;

  /* set up row and column total to check for score equivalence;
   * zero means no parent nodes. */
  if (isTRUE(equivalence)) {

    colsum = Calloc1D(nnodes, sizeof(int));

    for (i = 0; i < nnodes; i++)
      for (j = 0; j < nnodes; j++)
        colsum[j] += a[CMC(i, j, nnodes)];

  }/*THEN*/

  /* allocate and initialize the cache. */
  cache_value = REAL(cache);

  /* allocate a two-slot character vector. */
  PROTECT(arc = allocVector(STRSXP, 2));

  /* allocate and initialize the fake score delta. */
  PROTECT(delta = ScalarReal(0));

  /* allocate and initialize the score.delta() operator. */
  PROTECT(op = mkString("set"));

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

       /* incident nodes must be different from each other. */
       if (i == j) continue;

       /* if only one or two nodes' caches need updating, skip the rest. */
       for (k = 0; k < lupd; k++)
         if (upd[k] == j)
           goto there;

       continue;

there:

       /* no need to compute the score delta for blacklisted arcs. */
       if (b[CMC(i, j, nnodes)] == 1)
         continue;

       /* use score equivalence if possible to check only one orientation. */
       if (isTRUE(equivalence)) {

         /* if the following conditions are met, look up the score delta of
          * the reverse of the current arc:
          *   1) that score delta has already been computed.
          *   2) both incident nodes have no parent, so the arc is really
          *      score equivalent (no v-structures).
          *   3) the reversed arc has not been blacklisted, as the score delta
          *      is not computed in this case. */
         if ((i > j) && (colsum[i] + colsum[j] == 0) && (b[CMC(j, i, nnodes)] == 0)) {

           cache_value[CMC(i, j, nnodes)] = cache_value[CMC(j, i, nnodes)];
           continue;

         }/*THEN*/

       }/*THEN*/

       /* save the nodes incident on the arc. */
       SET_STRING_ELT(arc, 0, STRING_ELT(nodes, i));
       SET_STRING_ELT(arc, 1, STRING_ELT(nodes, j));

       /* if the arc is not present in the graph it should be added;
        * otherwise it should be removed. */
       if (a[CMC(i, j, nnodes)] == 0)
         SET_STRING_ELT(op, 0, mkChar("set"));
       else
         SET_STRING_ELT(op, 0, mkChar("drop"));

       /* checkpoint allocated memory. */
       /* evaluate the call to score.delta() for the arc. */
       PROTECT(temp = score_delta(arc, network, data, score, delta, reference,
         op, extra, decomposability));

       cache_value[CMC(i, j, nnodes)] = NUM(VECTOR_ELT(temp, 1));
       UNPROTECT(1);

       if (debuglevel > 0)
         Rprintf("* caching score delta for arc %s -> %s (%lf).\n",
           CHAR(STRING_ELT(nodes, i)), CHAR(STRING_ELT(nodes, j)),
            cache_value[CMC(i, j, nnodes)]);

    }/*FOR*/

  }/*FOR*/

  UNPROTECT(3);

  if (isTRUE(equivalence))
    Free1D(colsum);

  return cache;

}/*HC_CACHE_FILL*/
Ejemplo n.º 24
0
/* convert an arc set to a weighted edge list. */
SEXP arcs2welist(SEXP arcs, SEXP nodes, SEXP weights, SEXP nid, SEXP sublist,
    SEXP parents) {

int i = 0, j = 0, k = 0, nnodes = length(nodes), narcs = length(arcs)/2;
int *e = NULL, *coords = NULL, *adjacent = NULL;
int num_id = isTRUE(nid), sub = isTRUE(sublist), up = isTRUE(parents);
double *w = REAL(weights), *ew = NULL;
SEXP try, elist, edges, edge_weights, temp, temp_name = R_NilValue;

  /* allocate the return value. */
  PROTECT(elist = allocVector(VECSXP, nnodes));
  /* set the node names. */
  setAttrib(elist, R_NamesSymbol, nodes);

  /* allocate and initialize the subset name. */
  if (sub > 0)
    PROTECT(temp_name = mkStringVec(2, "edges", "weight"));

  /* allocate the scratch space to keep track adjacent nodes. */
  adjacent = alloc1dcont(nnodes);

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  coords = INTEGER(try);

  for (i = 0; i < narcs; i++)
    adjacent[coords[i + up * narcs] - 1]++;

  for (i = 0; i < nnodes; i++) {

    /* allocate and set up the edge array. */
    if (num_id > 0) {

      PROTECT(edges = allocVector(INTSXP, adjacent[i]));
      e = INTEGER(edges);

    }/*THEN*/
    else {

      PROTECT(edges = allocVector(STRSXP, adjacent[i]));

    }/*ELSE*/

    /* allocate and set up the weights array. */
    PROTECT(edge_weights = allocVector(REALSXP, adjacent[i]));
    ew = REAL(edge_weights);

    /* copy the coordinates or the labels of adjacent nodes. */
    for (j = 0, k = 0; j < narcs; j++) {

      if (coords[j + up * narcs] != i + 1)
        continue;

      /* copy the weight as well. */
      ew[k] = w[j];

      if (num_id > 0)
        e[k++] = coords[(1 - up) * narcs + j];
      else
        SET_STRING_ELT(edges, k++, STRING_ELT(arcs, (1 - up) * narcs + j));

      if (k == adjacent[i])
        break;

    }/*FOR*/

    if (sub > 0) {

      /* allocate and set up the "edge" sublist for graphNEL. */
      PROTECT(temp = allocVector(VECSXP, 2));
      setAttrib(temp, R_NamesSymbol, temp_name);
      SET_VECTOR_ELT(temp, 0, edges);
      SET_VECTOR_ELT(temp, 1, edge_weights);
      SET_VECTOR_ELT(elist, i, temp);
      UNPROTECT(1);

    }/*THEN*/
    else {

      /* save weights with edges as names. */
      setAttrib(edge_weights, R_NamesSymbol, edges);
      SET_VECTOR_ELT(elist, i, edge_weights);

    }/*ELSE*/

    UNPROTECT(2);

  }/*FOR*/

  if (sub > 0)
    UNPROTECT(3);
  else
    UNPROTECT(2);

  return elist;

}/*ARCS2WELIST*/
Ejemplo n.º 25
0
/* does "arc" match any elements of "set"? */
SEXP is_listed(SEXP arc, SEXP set, SEXP either, SEXP both, SEXP debug) {

    int i = 0, matched = 0, nrows = length(set) / 2;
    const char *from = CHAR(STRING_ELT(arc, 0));
    const char *to = CHAR(STRING_ELT(arc, 1));
    int debuglevel = isTRUE(debug);

    /* if the arc set is NULL, return immediately. */
    if (isNull(set))
        return ScalarLogical(FALSE);

    for (i = 0; i < nrows; i++) {

        if (debuglevel > 0)
            Rprintf("* checking %s -> %s\n", ARC(i, 0), ARC(i, 1));

        /* check the first element; if it does not match skip to the second one. */
        if (!strcmp(from, ARC(i, 0)) ) {

            /* if the second element matches, return if "both = FALSE" or if
             * "both = TRUE" and the reversed arc has been already found out. */
            if (!strcmp(to, ARC(i, 1)) ) {

                /* increment the "matched" counter, which is needed to be sure both
                 * A -> B and B -> A are in the arc set when "both = TRUE". */
                matched++;

                if (debuglevel > 0)
                    Rprintf("  > matched %s -> %s (matched is %d).\n", ARC(i, 0),
                            ARC(i, 1), matched);

                /* return TRUE if one of the following conditions is met:
                 *
                 * 1) exact match (either = both = FALSE).
                 * 2) match regardless of direction (either = TRUE).
                 * 3) match both directions (both = TRUE) when the other
                 *      one has already been found (matched = 2).
                 */
                if ((!isTRUE(either) && !isTRUE(both)) || isTRUE(either) ||
                        ((matched == 2) && isTRUE(both)))
                    goto success;

            }/*THEN*/

        }/*THEN*/
        else if (isTRUE(either) || isTRUE(both)) {

            /* the same as above, but with the reversed arc; this part is
             * skipped if "both = FALSE" and "either = FALSE", since the
             * reversed arc should not be matched in that case. */

            if (!strcmp(to, ARC(i, 0)) ) {

                if (!strcmp(from, ARC(i, 1)) ) {

                    /* increment the "matched" counter, which is needed to be sure both
                     * A -> B and B -> A are in the arc set when "both = TRUE". */
                    matched++;

                    if (debuglevel > 0)
                        Rprintf("  > matched %s -> %s (matched is %d).\n", ARC(i, 0),
                                ARC(i, 1), matched);

                    /* return TRUE if one of the following conditions is met:
                     *
                     * 1) match regardless of direction (either = TRUE).
                     * 2) match both directions (both = TRUE) when the other
                     *      one has already been found (matched = 2).
                     */
                    if (isTRUE(either) || ((matched == 2) && isTRUE(both)))
                        goto success;

                }/*THEN*/

            }/*THEN */

        }/*THEN*/

    }/*FOR*/

    return ScalarLogical(FALSE);

success:
    return ScalarLogical(TRUE);

}/*IS_LISTED*/
Ejemplo n.º 26
0
Archivo: dedup.c Proyecto: cran/bnlearn
/* remove one variable in each highly-correlated pair. */
SEXP dedup (SEXP data, SEXP threshold, SEXP complete, SEXP debug) {

int i = 0, j = 0, k = 0, dropped = 0, nc = 0;
int debuglevel = isTRUE(debug);
double *mean = NULL, *sse = NULL, *xx = NULL, *yy = NULL;
double cur_mean[2], cur_sse[2];
double tol = MACHINE_TOL, t = NUM(threshold);
long double sum = 0;
SEXP result, colnames;
gdata dt = { 0 };

  /* extract the columns from the data frame. */
  dt = gdata_from_SEXP(data, 0);
  meta_init_flags(&(dt.m), 0, complete, R_NilValue);
  meta_copy_names(&(dt.m), 0, data);
  /* set up the vectors for the pairwise complete observations. */
  xx = Calloc1D(dt.m.nobs, sizeof(double));
  yy = Calloc1D(dt.m.nobs, sizeof(double));

  if (debuglevel > 0)
    Rprintf("* caching means and variances.\n");

  mean = Calloc1D(dt.m.ncols, sizeof(double));
  sse = Calloc1D(dt.m.ncols, sizeof(double));

  /* cache the mean and variance of complete variables. */
  for (j = 0; j < dt.m.ncols; j++) {

    if (!dt.m.flag[j].complete)
      continue;

    mean[j] = c_mean(dt.col[j], dt.m.nobs);
    sse[j] = c_sse(dt.col[j], mean[j], dt.m.nobs);

  }/*FOR*/

  /* main loop. */
  for (j = 0; j < dt.m.ncols - 1; j++) {

    /* skip variables already flagged for removal. */
    if (dt.m.flag[j].drop)
      continue;

    if (debuglevel > 0)
      Rprintf("* looking at %s with %d variables still to check.\n",
        dt.m.names[j], dt.m.ncols - (j + 1));

    for (k = j + 1; k < dt.m.ncols; k++) {

      /* skip variables already flagged for removal. */
      if (dt.m.flag[k].drop)
        continue;

      if (dt.m.flag[j].complete && dt.m.flag[k].complete) {

        /* use the cached means and variances. */
        cur_mean[0] = mean[j];
        cur_mean[1] = mean[k];
        cur_sse[0] = sse[j];
        cur_sse[1] = sse[k];

        /* compute the covariance. */
        for (i = 0, sum = 0; i < dt.m.nobs; i++)
          sum += (dt.col[j][i] - cur_mean[0]) * (dt.col[k][i] - cur_mean[1]);

      }/*THEN*/
      else {

        for (i = 0, nc = 0; i < dt.m.nobs; i++) {

          if (ISNAN(dt.col[j][i]) || ISNAN(dt.col[k][i]))
            continue;

          xx[nc] = dt.col[j][i];
          yy[nc++] = dt.col[k][i];

        }/*FOR*/


        /* if there are no complete observations, take the variables to be
         * independent. */
        if (nc == 0)
          continue;

        cur_mean[0] = c_mean(xx, nc);
        cur_mean[1] = c_mean(yy, nc);
        cur_sse[0] = c_sse(xx, cur_mean[0], nc);
        cur_sse[1] = c_sse(yy, cur_mean[1], nc);

        /* compute the covariance. */
        for (i = 0, sum = 0; i < nc; i++)
          sum += (xx[i] - cur_mean[0]) * (yy[i] - cur_mean[1]);

      }/*ELSE*/

      /* safety check against "divide by zero" errors. */
      if ((cur_sse[0] < tol) || (cur_sse[1] < tol))
        sum = 0;
      else
        sum /= sqrt(cur_sse[0] * cur_sse[1]);

      /* test the correlation against the threshold. */
      if (fabsl(sum) > t) {

        if (debuglevel > 0)
          Rprintf("%s is collinear with %s, dropping %s with COR = %.4Lf\n",
            dt.m.names[j], dt.m.names[k], dt.m.names[k], sum);

        /* flag the variable for removal. */
        dt.m.flag[k].drop = TRUE;
        dropped++;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  /* set up the return value. */
  PROTECT(result = allocVector(VECSXP, dt.m.ncols - dropped));
  PROTECT(colnames = allocVector(STRSXP, dt.m.ncols - dropped));

  for (j = 0, k = 0; j < dt.m.ncols; j++)
    if (!dt.m.flag[j].drop) {

      SET_STRING_ELT(colnames, k, mkChar(dt.m.names[j]));
      SET_VECTOR_ELT(result, k++, VECTOR_ELT(data, j));

    }/*THEN*/

  setAttrib(result, R_NamesSymbol, colnames);

  /* make it a data frame. */
  minimal_data_frame(result);

  Free1D(mean);
  Free1D(sse);
  Free1D(xx);
  Free1D(yy);
  FreeGDT(dt, FALSE);

  UNPROTECT(2);

  return result;

}/*DEDUP*/
Ejemplo n.º 27
0
/* a single step of the optimized hill climbing (one arc addition/removal/reversal). */
SEXP hc_opt_step(SEXP amat, SEXP nodes, SEXP added, SEXP cache, SEXP reference,
    SEXP wlmat, SEXP blmat, SEXP nparents, SEXP maxp, SEXP debug) {

int nnodes = length(nodes), i = 0, j = 0;
int *am = NULL, *ad = NULL, *w = NULL, *b = NULL, debuglevel = isTRUE(debug);
int counter = 0, update = 1, from = 0, to = 0, *path = NULL, *scratch = NULL;
double *cache_value = NULL, temp = 0, max = 0, tol = MACHINE_TOL;
double *mp = REAL(maxp), *np = REAL(nparents);
SEXP bestop;

  /* allocate and initialize the return value (use FALSE as a canary value). */
  PROTECT(bestop = allocVector(VECSXP, 3));
  setAttrib(bestop, R_NamesSymbol, mkStringVec(3, "op", "from", "to"));

  /* allocate and initialize a dummy FALSE object. */
  SET_VECTOR_ELT(bestop, 0, ScalarLogical(FALSE));

  /* allocate buffers for c_has_path(). */
  path = Calloc1D(nnodes, sizeof(int));
  scratch = Calloc1D(nnodes, sizeof(int));

  /* save pointers to the numeric/integer matrices. */
  cache_value = REAL(cache);
  ad = INTEGER(added);
  am = INTEGER(amat);
  w = INTEGER(wlmat);
  b = INTEGER(blmat);

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0; i < nnodes * nnodes; i++)
       counter += ad[i];

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to add one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (ad[CMC(i, j, nnodes)] == 0)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)];

      if (debuglevel > 0) {

        Rprintf("  > trying to add %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      /* this score delta is the best one at the moment, so add the arc if it
       * does not introduce cycles in the graph. */
      if (temp - max > tol) {

        if (c_has_path(j, i, am, nnodes, nodes, FALSE, FALSE, path, scratch,
              FALSE)) {

          if (debuglevel > 0)
            Rprintf("    > not adding, introduces cycles in the graph.\n");

          continue;

        }/*THEN*/

        if (debuglevel > 0)
          Rprintf("    @ adding %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "set", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0, counter = 0; i < nnodes * nnodes; i++)
       counter += am[i] * (1 - w[i]);

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to remove one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (am[CMC(i, j, nnodes)] == 0)
        continue;

      /* whitelisted arcs are not to be removed, ever. */
      if (w[CMC(i, j, nnodes)] == 1)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)];

      if (debuglevel > 0) {

        Rprintf("  > trying to remove %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      if (temp - max > tol) {

        if (debuglevel > 0)
          Rprintf("    @ removing %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "drop", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0, counter = 0; i < nnodes; i++)
       for (j = 0; j < nnodes; j++)
         counter += am[CMC(i, j, nnodes)] * (1 - b[CMC(j, i, nnodes)]);

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to reverse one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (am[CMC(i, j, nnodes)] == 0)
        continue;

      /* don't reverse an arc if the one in the opposite direction is
       * blacklisted, ever. */
      if (b[CMC(j, i, nnodes)] == 1)
        continue;

      /* do not reverse an arc if that means violating the limit on the
       * maximum number of parents. */
      if (np[i] >= *mp)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)] + cache_value[CMC(j, i, nnodes)];
      /* nuke small values and negative zeroes. */
      if (fabs(temp) < tol) temp = 0;

      if (debuglevel > 0) {

        Rprintf("  > trying to reverse %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      if (temp - max > tol) {

        if (c_has_path(i, j, am, nnodes, nodes, FALSE, TRUE, path, scratch,
              FALSE)) {

          if (debuglevel > 0)
            Rprintf("    > not reversing, introduces cycles in the graph.\n");

          continue;

        }/*THEN*/

        if (debuglevel > 0)
          Rprintf("    @ reversing %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "reverse", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;
        /* both nodes' reference scores must be updated. */
        update = 2;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  /* update the reference scores. */
  REAL(reference)[to] += cache_value[CMC(from, to, nnodes)];
  if (update == 2)
    REAL(reference)[from] += cache_value[CMC(to, from, nnodes)];

  Free1D(path);
  Free1D(scratch);

  UNPROTECT(1);

  return bestop;

}/*HC_OPT_STEP*/
Ejemplo n.º 28
0
/* find out the partial ordering of the nodes of a DAG. */
SEXP topological_ordering(SEXP bn, SEXP root_nodes, SEXP reverse, SEXP debug) {

int *depth = NULL, *matched = NULL, debuglevel = isTRUE(debug);
int d = 0, i = 0, j = 0, changed = 0, nnodes = 0;
char *direction = NULL;
SEXP nodes_data, nodes, try, children, ordering;

  if (isTRUE(reverse))
    direction = "parents";
  else
    direction = "children";

  /* get to the nodes' data in both 'bn' and 'bn.fit' objects. */
  nodes_data = getListElement(bn, "nodes");

  if (isNull(nodes_data))
    nodes_data = bn;

  /* get and count the node labels. */
  PROTECT(nodes = getAttrib(nodes_data, R_NamesSymbol));
  nnodes = length(nodes);

  /* allocate a status vector to trak the ordering of the nodes. */
  PROTECT(ordering = allocVector(INTSXP, nnodes));
  depth = INTEGER(ordering);
  memset(depth, '\0', nnodes * sizeof(int));

  if (debuglevel > 0)
    Rprintf("* currently at depth 1 (starting BFS).\n");

  /* set the root nodes as the starting point of the BFS. */
  PROTECT(try = match(nodes, root_nodes, 0));
  matched = INTEGER(try);

  for (i = 0; i < length(try); i++) {

    if (debuglevel > 0)
      Rprintf("  > got node %s.\n", NODE(matched[i] - 1));

    depth[matched[i] - 1] = 1;

  }/*FOR*/

  UNPROTECT(1);

  /* now let's go down from the roots to the leafs, one layer at a time. */
  for (d = 1; d <= nnodes; d++) {

    if (debuglevel > 0)
      Rprintf("* currently at depth %d.\n", d + 1);

    /* reset the changed flag. */
    changed = 0;

    for (i = 0; i < nnodes; i++) {

      /* this node has already been visisted, skip. */
      if (depth[i] < d)
        continue;

      children = getListElement(VECTOR_ELT(nodes_data, i), direction);

      /* this node is a leaf, nothing to do, move along. */
      if (length(children) == 0)
        continue;

      /* set the changed flag. */
      changed = 1;

      PROTECT(try = match(nodes, children, 0));
      matched = INTEGER(try);

      /* set the correct depth to the children of this node. */
      for (j = 0; j < length(try); j++) {

        if (debuglevel > 0)
          Rprintf("  > got node %s from %s.\n",
            NODE(matched[j] - 1), NODE(i));

        depth[matched[j] - 1] = d + 1;

      }/*FOR*/

      UNPROTECT(1);

    }/*FOR*/

    /* all nodes have been visited, break. */
    if (!changed) break;

  }/*FOR*/

  if (debuglevel > 0)
    Rprintf("* all nodes have been scheduled.\n");

  /* add the node labels to the return value. */
  setAttrib(ordering, R_NamesSymbol, nodes);

  UNPROTECT(2);

  return ordering;

}/*TOPOLOGICAL_ORDERING*/
Ejemplo n.º 29
0
double castelo_prior(SEXP beta, SEXP target, SEXP parents, SEXP children,
    int debuglevel) {

int i = 0, k = 0, t = 0, nnodes = 0, cur_arc = 0;
int nbeta = length(VECTOR_ELT(beta, 0));
int *temp = NULL, *aid = INTEGER(VECTOR_ELT(beta, 2));
double prior = 0, result = 0;
double *bkwd = REAL(VECTOR_ELT(beta, 4)), *fwd = REAL(VECTOR_ELT(beta, 3));
short int *adjacent = NULL;
SEXP nodes, try;

  /* get the node labels. */
  nodes = getAttrib(beta, BN_NodesSymbol);
  nnodes = length(nodes);

  /* match the target node. */
  PROTECT(try = match(nodes, target, 0));
  t = INT(try);
  UNPROTECT(1);

  /* find out which nodes are parents and which nodes are children. */
  adjacent = allocstatus(nnodes);

  PROTECT(try = match(nodes, parents, 0));
  temp = INTEGER(try);
  for (i = 0; i < length(try); i++)
    adjacent[temp[i] - 1] = PARENT;
  UNPROTECT(1);

  PROTECT(try = match(nodes, children, 0));
  temp = INTEGER(try);
  for (i = 0; i < length(try); i++)
    adjacent[temp[i] - 1] = CHILD;
  UNPROTECT(1);

  /* prior probabilities table lookup. */
  for (i = t + 1; i <= nnodes; i++) {

    /* compute the arc id. */
    cur_arc = UPTRI3(t, i, nnodes);

    /* look up the prior probability. */
    for (/*k,*/ prior = ((double)1/3); k < nbeta; k++) {

      /* arcs are ordered, so we can stop early in the lookup. */
      if (aid[k] > cur_arc)
        break;

      if (aid[k] == cur_arc) {

        switch(adjacent[i - 1]) {

          case PARENT:
            prior = bkwd[k];
            break;
          case CHILD:
            prior = fwd[k];
            break;
          default:
            prior = fmax2(0, 1 - bkwd[k] - fwd[k]);

        }/*SWITCH*/

        break;

      }/*THEN*/

    }/*FOR*/

    if (debuglevel > 0) {

      switch(adjacent[i - 1]) {

        case PARENT:
          Rprintf("  > found arc %s -> %s, prior pobability is %lf.\n",
            NODE(i - 1), NODE(t - 1), prior);
          break;
        case CHILD:
          Rprintf("  > found arc %s -> %s, prior probability is %lf.\n",
            NODE(t - 1), NODE(i - 1), prior);
          break;
        default:
          Rprintf("  > no arc between %s and %s, prior probability is %lf.\n",
            NODE(t - 1), NODE(i - 1), prior);

      }/*SWITCH*/

    }/*THEN*/

    /* move to log-scale and divide by the non-informative log(1/3), so that
     * the contribution of each arc whose prior has not been specified by the
     * user is zero; overflow is likely otherwise. */
    result += log(prior / ((double)1/3));

  }/*FOR*/

  return result;

}/*CASTELO_PRIOR*/

/* complete a prior as per Castelo & Siebes. */
SEXP castelo_completion(SEXP prior, SEXP nodes, SEXP learning)  {

int i = 0, k = 0, cur = 0, narcs1 = 0, narcs2 = 0, nnodes = length(nodes);
int *m1 = NULL, *m2 = NULL, *und = NULL, *aid = NULL, *poset = NULL, *id = NULL;
double *d1 = NULL, *d2 = NULL, *p = NULL;
SEXP df, arc_id, undirected, a1, a2, match1, match2, prob;
SEXP result, from, to, nid, dir1, dir2;

  /* compute numeric IDs for the arcs. */
  a1 = VECTOR_ELT(prior, 0);
  a2 = VECTOR_ELT(prior, 1);
  narcs1 = length(a1);
  PROTECT(match1 = match(nodes, a1, 0));
  PROTECT(match2 = match(nodes, a2, 0));
  m1 = INTEGER(match1);
  m2 = INTEGER(match2);
  PROTECT(arc_id = allocVector(INTSXP, narcs1));
  aid = INTEGER(arc_id);

  c_arc_hash(narcs1, nnodes, m1, m2, aid, NULL, FALSE);

  /* duplicates correspond to undirected arcs. */
  PROTECT(undirected = dupe(arc_id));
  und = INTEGER(undirected);

  /* extract the components from the prior. */
  prob = VECTOR_ELT(prior, 2);
  p = REAL(prob);

  /* count output arcs. */
  for (i = 0; i < narcs1; i++)
    narcs2 += 2 - und[i];
  narcs2 /= 2;

  /* allocate the columns of the return value. */
  PROTECT(from = allocVector(STRSXP, narcs2));
  PROTECT(to = allocVector(STRSXP, narcs2));
  PROTECT(nid = allocVector(INTSXP, narcs2));
  id = INTEGER(nid);
  PROTECT(dir1 = allocVector(REALSXP, narcs2));
  d1 = REAL(dir1);
  PROTECT(dir2 = allocVector(REALSXP, narcs2));
  d2 = REAL(dir2);

  /* sort the strength coefficients. */
  poset = alloc1dcont(narcs1);
  for (k = 0; k < narcs1; k++)
    poset[k] = k;
  R_qsort_int_I(aid, poset, 1, narcs1);

  for (i = 0, k = 0; i < narcs1; i++) {

    cur = poset[i];

#define ASSIGN(A1, A2, D1, D2) \
  SET_STRING_ELT(from,  k, STRING_ELT(A1, cur)); \
  SET_STRING_ELT(to,  k, STRING_ELT(A2, cur)); \
  id[k] = aid[i]; \
  D1[k] = p[cur]; \
  if ((und[cur] == TRUE) && (i < narcs1 - 1)) \
    D2[k] = p[poset[++i]]; \
  else \
    D2[k] = (1 - D1[k])/2;

    /* copy the node labels. */
    if (m1[cur] < m2[cur]) {

      ASSIGN(a1, a2, d1, d2);

    }/*THEN*/
    else {

      ASSIGN(a2, a1, d2, d1);

    }/*ELSE*/

    /* check the probabilities do not exceed 1; fail only for large errors. */
    if (d1[k] + d2[k] > 1) {

      if (d1[k] + d2[k] < 1 + 2 * MACHINE_TOL) {

        d1[k] = d1[k] / (d1[k] + d2[k]);
        d2[k] = d2[k] / (d1[k] + d2[k]);

      }/*THEN*/
      else {

        UNPROTECT(9);

        error("the probabilities for arc %s -> %s sum to %lf.",
          CHAR(STRING_ELT(from, k)), CHAR(STRING_ELT(to, k)), d1[k] + d2[k]);

      }/*ELSE*/

    }/*THEN*/

    /* bound the probability of not including an arc away from zero, structure
     * learning otherwise fails when starting from the empty graph and gets
     * stuck very easily in general. */
    if (isTRUE(learning) && (fabs(1 - d1[k] - d2[k]) < MACHINE_TOL)) {

      d1[k] = d1[k] - MACHINE_TOL;
      d2[k] = d2[k] - MACHINE_TOL;

    }/*THEN*/

    /* move to the next arc. */
    k++;

  }/*FOR*/

  /* set up the return value. */
  PROTECT(result = allocVector(VECSXP, 5));
  SET_VECTOR_ELT(result, 0, from);
  SET_VECTOR_ELT(result, 1, to);
  SET_VECTOR_ELT(result, 2, nid);
  SET_VECTOR_ELT(result, 3, dir1);
  SET_VECTOR_ELT(result, 4, dir2);
  setAttrib(result, R_NamesSymbol,
    mkStringVec(5, "from", "to", "aid", "fwd", "bkwd"));
  PROTECT(df = minimal_data_frame(result));

  UNPROTECT(11);

  return df;

}/*CASTELO_COMPLETION*/