예제 #1
0
SEXP smart_network_averaging(SEXP arcs, SEXP nodes, SEXP weights) {

int k = 0, from = 0, to = 0, nrows = length(arcs) / 2, dims = length(nodes);
int *a = NULL, *coords = NULL, *poset = NULL, *path = NULL, *scratch = NULL;
double *w = NULL;
SEXP weights2, amat, try, acyclic;

  /* allocate and initialize the adjacency matrix. */
  PROTECT(amat = allocMatrix(INTSXP, dims, dims));
  a = INTEGER(amat);
  memset(a, '\0', sizeof(int) * dims * dims);

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  coords = INTEGER(try);

  /* duplicate the weights to preserve the original ones. */
  PROTECT(weights2 = duplicate(weights));
  w = REAL(weights2);

  /* sort the strength coefficients. */
  poset = Calloc1D(nrows, sizeof(int));
  for (k = 0; k < nrows; k++)
    poset[k] = k;
  R_qsort_I(w, poset, 1, nrows);

  /* allocate buffers for c_has_path(). */
  path = Calloc1D(dims, sizeof(int));
  scratch = Calloc1D(dims, sizeof(int));

  /* iterate over the arcs in reverse order wrt their strength coefficients. */
  for (k = 0; k < nrows; k++) {

    from = coords[poset[k]] - 1;
    to = coords[poset[k] + nrows] - 1;

    /* add an arc only if it does not introduce cycles. */
    if (!c_has_path(to, from, a, dims, nodes, FALSE, TRUE, path, scratch, FALSE))
      a[CMC(from, to, dims)] = 1;
    else
      warning("arc %s -> %s would introduce cycles in the graph, ignoring.",
        NODE(from), NODE(to));

  }/*FOR*/

  /* convert the adjacency matrix back to an arc set and return it. */
  acyclic = amat2arcs(amat, nodes);

  Free1D(path);
  Free1D(scratch);
  Free1D(poset);

  UNPROTECT(3);

  return acyclic;

}/*SMART_NETWORK_AVERAGING*/
예제 #2
0
파일: path.c 프로젝트: aogbechie/DBN
SEXP has_pdag_path(SEXP from, SEXP to, SEXP amat, SEXP nrows, SEXP nodes,
    SEXP underlying, SEXP exclude_direct, SEXP debug) {

int start = INT(from) - 1, stop = INT(to) - 1, n = INT(nrows);
int debuglevel = LOGICAL(debug)[0], notdirect = LOGICAL(exclude_direct)[0],
      ugraph = LOGICAL(underlying)[0], *a = INTEGER(amat);

  return ScalarLogical(c_has_path(start, stop, a, n, nodes, ugraph, notdirect,
           debuglevel));

}/*HAS_DAG_PATH*/
예제 #3
0
SEXP is_pdag_acyclic(SEXP arcs, SEXP nodes, SEXP return_nodes, 
    SEXP directed, SEXP debug) {

int i = 0, j = 0, z = 0;
int nrows = LENGTH(nodes);
int check_status = nrows, check_status_old = nrows;
int *rowsums = NULL, *colsums = NULL, *crossprod = NULL, *a = NULL;
int *debuglevel = NULL;
short int *status = NULL;
SEXP amat;

  /* dereference the debug parameter. */
  debuglevel = LOGICAL(debug);

  /* build the adjacency matrix from the arc set.  */
  if (*debuglevel > 0)
    Rprintf("* building the adjacency matrix.\n");

  PROTECT(amat = arcs2amat(arcs, nodes));
  a = INTEGER(amat);

  /* should we consider only directed arcs? */
  if (isTRUE(directed)) {

    /* removing undirected arcs, so that only cycles made only of directed
     * arcs will make the function return TRUE. */

    for (i = 0; i < nrows; i++)
      for (j = 0; j < nrows; j++)
        if ((a[CMC(i, j, nrows)] == 1) && (a[CMC(j, i, nrows)] == 1))
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

  }/*THEN*/

  /* initialize the status, {row,col}sums and crossprod arrays. */
  status = allocstatus(nrows);
  rowsums = alloc1dcont(nrows);
  colsums = alloc1dcont(nrows);
  crossprod = alloc1dcont(nrows);

  if (*debuglevel > 0)
    Rprintf("* checking whether the partially directed graph is acyclic.\n");

  /* even in the worst case scenario at least two nodes are marked as
   * good at each iteration, so even ceil(nrows/2) iterations should be
   * enough. */
  for (z = 0; z < nrows; z++) {

start:

    if (*debuglevel > 0)
      Rprintf("* beginning iteration %d.\n", z + 1);

    for (i = 0; i < nrows; i++) {

      /* skip known-good nodes. */
      if (status[i] == GOOD) continue;

      /* reset and update row and column totals. */
      rowsums[i] = colsums[i] = crossprod[i] = 0;

      /* compute row and column totals for the i-th node. */
      for (j = 0; j < nrows; j++) {

        rowsums[i] += a[CMC(i, j, nrows)];
        colsums[i] += a[CMC(j, i, nrows)];
        crossprod[i] += a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)];

      }/*FOR*/

there:

      if (*debuglevel > 0)
        Rprintf("  > checking node %s (%d child(ren), %d parent(s), %d neighbours).\n",
          NODE(i), rowsums[i], colsums[i], crossprod[i]);

      /* if either total is zero, the node is either a root node or a
       * leaf node, and is not part of any cycle. */
      if (((rowsums[i] == 0) || (colsums[i] == 0)) ||
          ((crossprod[i] == 1) && (rowsums[i] == 1) && (colsums[i] == 1))) {

        if (*debuglevel > 0)
          Rprintf("  @ node %s is cannot be part of a cycle.\n", NODE(i));

        /* update the adjacency matrix and the row/column totals. */
        for (j = 0; j < nrows; j++)
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

        rowsums[i] = colsums[i] = crossprod[i] = 0;

        /* mark the node as good. */
        status[i] = GOOD;
        check_status--;

      }/*THEN*/
      else if (crossprod[i] == 1) {

        /* find the other of the undirected arc. */
        for (j = 0; j < i; j++)
          if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1)
            break;

        /* safety check, just in case. */
        if (i == j) continue;

        if (((colsums[i] == 1) && (colsums[j] == 1)) ||
            ((rowsums[i] == 1) && (rowsums[j] == 1))) {

          if (*debuglevel > 0)
            Rprintf("  @ arc %s - %s is cannot be part of a cycle.\n", NODE(i), NODE(j));

          /* update the adjacency matrix and the row/column totals. */
          a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;
          crossprod[i] = 0;
          rowsums[i]--;
          colsums[i]--;
          rowsums[j]--;
          colsums[j]--;

          /* jump back to the first check; if either the row or column total
           * was equal to 1 only because of the undirected arc, the node can
           * now be marked as good. */
          if ((rowsums[i] == 0) || (colsums[i] == 0))
            goto there;

        }/*THEN*/

      }/*THEN*/

    }/*FOR*/

    /* at least three nodes are needed to have a cycle. */
    if (check_status < 3) {

      if (*debuglevel > 0)
        Rprintf("@ at least three nodes are needed to have a cycle.\n");

      UNPROTECT(1);
      return build_return_array(nodes, status, nrows, check_status, return_nodes);

    }/*THEN*/

    /* if there are three or more bad nodes and there was no change in
     * the last iteration, the algorithm is stuck on a cycle. */
    if (check_status_old == check_status) {

      if (*debuglevel > 0)
        Rprintf("@ no change in the last iteration.\n");

      /* give up and call c_has_path() to kill some undirected arcs. */
      for (i = 0; i < nrows; i++)
        for (j = 0; j < i; j++)
          if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1) {

            /* remove the arc from the adjacency matrix while testing it,
             * there's a path is always found (the arc itself). */
            a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0;

            if(!c_has_path(i, j, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE) &&
               !c_has_path(j, i, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE)) {

              if (*debuglevel > 0)
                Rprintf("@ arc %s - %s is not part of any cycle, removing.\n", NODE(i), NODE(j));

              /* increase the iteration counter and start again. */
              z++;
              goto start;

            }/*THEN*/
            else {

              /* at least one cycle is really present; give up and return.  */
              UNPROTECT(1);
              return build_return_array(nodes, status, nrows, check_status, return_nodes);

            }/*ELSE*/

          }/*THEN*/

      /* give up if there are no undirected arcs, cycles composed
       * entirely by directed arcs are never false positives. */
      UNPROTECT(1);
      return build_return_array(nodes, status, nrows, check_status, return_nodes);

    }/*THEN*/
    else {

      check_status_old = check_status;

    }/*ELSE*/

  }/*FOR*/

  UNPROTECT(1);
  return build_return_array(nodes, status, nrows, check_status, return_nodes);

}/*IS_PDAG_ACYCLIC*/
예제 #4
0
/* a single step of the optimized hill climbing (one arc addition/removal/reversal). */
SEXP hc_opt_step(SEXP amat, SEXP nodes, SEXP added, SEXP cache, SEXP reference,
    SEXP wlmat, SEXP blmat, SEXP nparents, SEXP maxp, SEXP debug) {

int nnodes = length(nodes), i = 0, j = 0;
int *am = NULL, *ad = NULL, *w = NULL, *b = NULL, debuglevel = isTRUE(debug);
int counter = 0, update = 1, from = 0, to = 0, *path = NULL, *scratch = NULL;
double *cache_value = NULL, temp = 0, max = 0, tol = MACHINE_TOL;
double *mp = REAL(maxp), *np = REAL(nparents);
SEXP bestop;

  /* allocate and initialize the return value (use FALSE as a canary value). */
  PROTECT(bestop = allocVector(VECSXP, 3));
  setAttrib(bestop, R_NamesSymbol, mkStringVec(3, "op", "from", "to"));

  /* allocate and initialize a dummy FALSE object. */
  SET_VECTOR_ELT(bestop, 0, ScalarLogical(FALSE));

  /* allocate buffers for c_has_path(). */
  path = Calloc1D(nnodes, sizeof(int));
  scratch = Calloc1D(nnodes, sizeof(int));

  /* save pointers to the numeric/integer matrices. */
  cache_value = REAL(cache);
  ad = INTEGER(added);
  am = INTEGER(amat);
  w = INTEGER(wlmat);
  b = INTEGER(blmat);

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0; i < nnodes * nnodes; i++)
       counter += ad[i];

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to add one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (ad[CMC(i, j, nnodes)] == 0)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)];

      if (debuglevel > 0) {

        Rprintf("  > trying to add %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      /* this score delta is the best one at the moment, so add the arc if it
       * does not introduce cycles in the graph. */
      if (temp - max > tol) {

        if (c_has_path(j, i, am, nnodes, nodes, FALSE, FALSE, path, scratch,
              FALSE)) {

          if (debuglevel > 0)
            Rprintf("    > not adding, introduces cycles in the graph.\n");

          continue;

        }/*THEN*/

        if (debuglevel > 0)
          Rprintf("    @ adding %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "set", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0, counter = 0; i < nnodes * nnodes; i++)
       counter += am[i] * (1 - w[i]);

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to remove one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (am[CMC(i, j, nnodes)] == 0)
        continue;

      /* whitelisted arcs are not to be removed, ever. */
      if (w[CMC(i, j, nnodes)] == 1)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)];

      if (debuglevel > 0) {

        Rprintf("  > trying to remove %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      if (temp - max > tol) {

        if (debuglevel > 0)
          Rprintf("    @ removing %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "drop", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  if (debuglevel > 0) {

     /* count how may arcs are to be tested. */
     for (i = 0, counter = 0; i < nnodes; i++)
       for (j = 0; j < nnodes; j++)
         counter += am[CMC(i, j, nnodes)] * (1 - b[CMC(j, i, nnodes)]);

     Rprintf("----------------------------------------------------------------\n");
     Rprintf("* trying to reverse one of %d arcs.\n", counter);

  }/*THEN*/

  for (i = 0; i < nnodes; i++) {

    for (j = 0; j < nnodes; j++) {

      /* nothing to see, move along. */
      if (am[CMC(i, j, nnodes)] == 0)
        continue;

      /* don't reverse an arc if the one in the opposite direction is
       * blacklisted, ever. */
      if (b[CMC(j, i, nnodes)] == 1)
        continue;

      /* do not reverse an arc if that means violating the limit on the
       * maximum number of parents. */
      if (np[i] >= *mp)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, nnodes)] + cache_value[CMC(j, i, nnodes)];
      /* nuke small values and negative zeroes. */
      if (fabs(temp) < tol) temp = 0;

      if (debuglevel > 0) {

        Rprintf("  > trying to reverse %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      if (temp - max > tol) {

        if (c_has_path(i, j, am, nnodes, nodes, FALSE, TRUE, path, scratch,
              FALSE)) {

          if (debuglevel > 0)
            Rprintf("    > not reversing, introduces cycles in the graph.\n");

          continue;

        }/*THEN*/

        if (debuglevel > 0)
          Rprintf("    @ reversing %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "reverse", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        from = i;
        to = j;
        /* both nodes' reference scores must be updated. */
        update = 2;

        /* update the threshold score delta. */
        max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  /* update the reference scores. */
  REAL(reference)[to] += cache_value[CMC(from, to, nnodes)];
  if (update == 2)
    REAL(reference)[from] += cache_value[CMC(to, from, nnodes)];

  Free1D(path);
  Free1D(scratch);

  UNPROTECT(1);

  return bestop;

}/*HC_OPT_STEP*/
예제 #5
0
/* try to reverse an arc in the current network, minding the tabu list. */
void tabu_rev(double *cache_value, int *b, int *am, SEXP bestop, SEXP nodes,
    int *nnodes, int *from, int *to, double *max, int *update, SEXP tabu_list,
    int *cur, int *narcs, int *debuglevel) {

int i = 0, j = 0, idx = 0;
double temp = 0, tol = MACHINE_TOL;

  for (i = 0; i < *nnodes; i++) {

    for (j = 0; j < *nnodes; j++) {

      /* nothing to see, move along. */
      if (am[CMC(i, j, *nnodes)] == 0)
        continue;

      /* don't reverse an arc if the one in the opposite direction is
       * blacklisted, ever. */
      if (b[CMC(j, i, *nnodes)] == 1)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, *nnodes)] + cache_value[CMC(j, i, *nnodes)];

      if (*debuglevel > 0) {

        Rprintf("  > trying to reverse %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      if (temp - *max > tol) {

        if (c_has_path(i, j, am, *nnodes, nodes, FALSE, TRUE, FALSE)) {

          if (*debuglevel > 0)
            Rprintf("    > not reversing, introduces cycles in the graph.\n");

          continue;

        }/*THEN*/

        /* update the adjacency matrix. */
        am[CMC(i, j, *nnodes)] = 0;
        am[CMC(j, i, *nnodes)] = 1;

        /* lookup in the tabu list. */
        idx = tabu_match(tabu_list, cur, am, narcs, nnodes, debuglevel);

        /* undo the changes in the adjacency matrix. */
        am[CMC(i, j, *nnodes)] = 1;
        am[CMC(j, i, *nnodes)] = 0;

        if (idx > 0) {

          if (*debuglevel > 0)
            Rprintf("    > not reversing, network matches element %d in the tabu list.\n", idx);

          continue;

        }/*THEN*/

        if (*debuglevel > 0)
          Rprintf("    @ reversing %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "reverse", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        *from = i;
        *to = j;
        /* both nodes' reference scores must be updated. */
        *update = 2;

        /* update the threshold score delta. */
        *max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

}/*TABU_REV*/
예제 #6
0
/* try to add an arc to the current network, minding the tabu list. */
void tabu_add(double *cache_value, int *ad, int *am, SEXP bestop, SEXP nodes,
    int *nnodes, int *from, int *to, double *max, SEXP tabu_list, int *cur,
    int *narcs, int *debuglevel) {

int i = 0, j = 0, idx = 0;
double temp = 0, tol = MACHINE_TOL;

  for (i = 0; i < *nnodes; i++) {

    for (j = 0; j < *nnodes; j++) {

      /* nothing to see, move along. */
      if (ad[CMC(i, j, *nnodes)] == 0)
        continue;

      /* retrieve the score delta from the cache. */
      temp = cache_value[CMC(i, j, *nnodes)];

      if (*debuglevel > 0) {

        Rprintf("  > trying to add %s -> %s.\n", NODE(i), NODE(j));
        Rprintf("    > delta between scores for nodes %s %s is %lf.\n",
          NODE(i), NODE(j), temp);

      }/*THEN*/

      /* this score delta is the best one at the moment, so add the arc if it
       * does not introduce cycles in the graph and does not match any of the
       * networks stored in the tabu list. */
      if (temp - *max > tol) {

        if (c_has_path(j, i, am, *nnodes, nodes, FALSE, FALSE, FALSE)) {

          if (*debuglevel > 0)
            Rprintf("    > not adding, introduces cycles in the graph.\n");

          continue;

        }/*THEN*/

        /* update the adjacency matrix. */
        am[CMC(i, j, *nnodes)] = 1;
        *narcs += 1;

        /* lookup in the tabu list. */
        idx = tabu_match(tabu_list, cur, am, narcs, nnodes, debuglevel);

        /* undo the changes in the adjacency matrix. */
        am[CMC(i, j, *nnodes)] = 0;
        *narcs -= 1;

        if (idx > 0) {

          if (*debuglevel > 0)
            Rprintf("    > not adding, network matches element %d in the tabu list.\n", idx);

          continue;

        }/*THEN*/

        if (*debuglevel > 0)
          Rprintf("    @ adding %s -> %s.\n", NODE(i), NODE(j));

        /* update the return value. */
        bestop_update(bestop, "set", NODE(i), NODE(j));
        /* store the node indices to update the reference scores. */
        *from = i;
        *to = j;

        /* update the threshold score delta. */
        *max = temp;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

}/*TABU_ADD*/