SEXP smart_network_averaging(SEXP arcs, SEXP nodes, SEXP weights) { int k = 0, from = 0, to = 0, nrows = length(arcs) / 2, dims = length(nodes); int *a = NULL, *coords = NULL, *poset = NULL, *path = NULL, *scratch = NULL; double *w = NULL; SEXP weights2, amat, try, acyclic; /* allocate and initialize the adjacency matrix. */ PROTECT(amat = allocMatrix(INTSXP, dims, dims)); a = INTEGER(amat); memset(a, '\0', sizeof(int) * dims * dims); /* match the node labels in the arc set. */ PROTECT(try = match(nodes, arcs, 0)); coords = INTEGER(try); /* duplicate the weights to preserve the original ones. */ PROTECT(weights2 = duplicate(weights)); w = REAL(weights2); /* sort the strength coefficients. */ poset = Calloc1D(nrows, sizeof(int)); for (k = 0; k < nrows; k++) poset[k] = k; R_qsort_I(w, poset, 1, nrows); /* allocate buffers for c_has_path(). */ path = Calloc1D(dims, sizeof(int)); scratch = Calloc1D(dims, sizeof(int)); /* iterate over the arcs in reverse order wrt their strength coefficients. */ for (k = 0; k < nrows; k++) { from = coords[poset[k]] - 1; to = coords[poset[k] + nrows] - 1; /* add an arc only if it does not introduce cycles. */ if (!c_has_path(to, from, a, dims, nodes, FALSE, TRUE, path, scratch, FALSE)) a[CMC(from, to, dims)] = 1; else warning("arc %s -> %s would introduce cycles in the graph, ignoring.", NODE(from), NODE(to)); }/*FOR*/ /* convert the adjacency matrix back to an arc set and return it. */ acyclic = amat2arcs(amat, nodes); Free1D(path); Free1D(scratch); Free1D(poset); UNPROTECT(3); return acyclic; }/*SMART_NETWORK_AVERAGING*/
SEXP has_pdag_path(SEXP from, SEXP to, SEXP amat, SEXP nrows, SEXP nodes, SEXP underlying, SEXP exclude_direct, SEXP debug) { int start = INT(from) - 1, stop = INT(to) - 1, n = INT(nrows); int debuglevel = LOGICAL(debug)[0], notdirect = LOGICAL(exclude_direct)[0], ugraph = LOGICAL(underlying)[0], *a = INTEGER(amat); return ScalarLogical(c_has_path(start, stop, a, n, nodes, ugraph, notdirect, debuglevel)); }/*HAS_DAG_PATH*/
SEXP is_pdag_acyclic(SEXP arcs, SEXP nodes, SEXP return_nodes, SEXP directed, SEXP debug) { int i = 0, j = 0, z = 0; int nrows = LENGTH(nodes); int check_status = nrows, check_status_old = nrows; int *rowsums = NULL, *colsums = NULL, *crossprod = NULL, *a = NULL; int *debuglevel = NULL; short int *status = NULL; SEXP amat; /* dereference the debug parameter. */ debuglevel = LOGICAL(debug); /* build the adjacency matrix from the arc set. */ if (*debuglevel > 0) Rprintf("* building the adjacency matrix.\n"); PROTECT(amat = arcs2amat(arcs, nodes)); a = INTEGER(amat); /* should we consider only directed arcs? */ if (isTRUE(directed)) { /* removing undirected arcs, so that only cycles made only of directed * arcs will make the function return TRUE. */ for (i = 0; i < nrows; i++) for (j = 0; j < nrows; j++) if ((a[CMC(i, j, nrows)] == 1) && (a[CMC(j, i, nrows)] == 1)) a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0; }/*THEN*/ /* initialize the status, {row,col}sums and crossprod arrays. */ status = allocstatus(nrows); rowsums = alloc1dcont(nrows); colsums = alloc1dcont(nrows); crossprod = alloc1dcont(nrows); if (*debuglevel > 0) Rprintf("* checking whether the partially directed graph is acyclic.\n"); /* even in the worst case scenario at least two nodes are marked as * good at each iteration, so even ceil(nrows/2) iterations should be * enough. */ for (z = 0; z < nrows; z++) { start: if (*debuglevel > 0) Rprintf("* beginning iteration %d.\n", z + 1); for (i = 0; i < nrows; i++) { /* skip known-good nodes. */ if (status[i] == GOOD) continue; /* reset and update row and column totals. */ rowsums[i] = colsums[i] = crossprod[i] = 0; /* compute row and column totals for the i-th node. */ for (j = 0; j < nrows; j++) { rowsums[i] += a[CMC(i, j, nrows)]; colsums[i] += a[CMC(j, i, nrows)]; crossprod[i] += a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)]; }/*FOR*/ there: if (*debuglevel > 0) Rprintf(" > checking node %s (%d child(ren), %d parent(s), %d neighbours).\n", NODE(i), rowsums[i], colsums[i], crossprod[i]); /* if either total is zero, the node is either a root node or a * leaf node, and is not part of any cycle. */ if (((rowsums[i] == 0) || (colsums[i] == 0)) || ((crossprod[i] == 1) && (rowsums[i] == 1) && (colsums[i] == 1))) { if (*debuglevel > 0) Rprintf(" @ node %s is cannot be part of a cycle.\n", NODE(i)); /* update the adjacency matrix and the row/column totals. */ for (j = 0; j < nrows; j++) a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0; rowsums[i] = colsums[i] = crossprod[i] = 0; /* mark the node as good. */ status[i] = GOOD; check_status--; }/*THEN*/ else if (crossprod[i] == 1) { /* find the other of the undirected arc. */ for (j = 0; j < i; j++) if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1) break; /* safety check, just in case. */ if (i == j) continue; if (((colsums[i] == 1) && (colsums[j] == 1)) || ((rowsums[i] == 1) && (rowsums[j] == 1))) { if (*debuglevel > 0) Rprintf(" @ arc %s - %s is cannot be part of a cycle.\n", NODE(i), NODE(j)); /* update the adjacency matrix and the row/column totals. */ a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0; crossprod[i] = 0; rowsums[i]--; colsums[i]--; rowsums[j]--; colsums[j]--; /* jump back to the first check; if either the row or column total * was equal to 1 only because of the undirected arc, the node can * now be marked as good. */ if ((rowsums[i] == 0) || (colsums[i] == 0)) goto there; }/*THEN*/ }/*THEN*/ }/*FOR*/ /* at least three nodes are needed to have a cycle. */ if (check_status < 3) { if (*debuglevel > 0) Rprintf("@ at least three nodes are needed to have a cycle.\n"); UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*THEN*/ /* if there are three or more bad nodes and there was no change in * the last iteration, the algorithm is stuck on a cycle. */ if (check_status_old == check_status) { if (*debuglevel > 0) Rprintf("@ no change in the last iteration.\n"); /* give up and call c_has_path() to kill some undirected arcs. */ for (i = 0; i < nrows; i++) for (j = 0; j < i; j++) if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1) { /* remove the arc from the adjacency matrix while testing it, * there's a path is always found (the arc itself). */ a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0; if(!c_has_path(i, j, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE) && !c_has_path(j, i, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE)) { if (*debuglevel > 0) Rprintf("@ arc %s - %s is not part of any cycle, removing.\n", NODE(i), NODE(j)); /* increase the iteration counter and start again. */ z++; goto start; }/*THEN*/ else { /* at least one cycle is really present; give up and return. */ UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*ELSE*/ }/*THEN*/ /* give up if there are no undirected arcs, cycles composed * entirely by directed arcs are never false positives. */ UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*THEN*/ else { check_status_old = check_status; }/*ELSE*/ }/*FOR*/ UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*IS_PDAG_ACYCLIC*/
/* a single step of the optimized hill climbing (one arc addition/removal/reversal). */ SEXP hc_opt_step(SEXP amat, SEXP nodes, SEXP added, SEXP cache, SEXP reference, SEXP wlmat, SEXP blmat, SEXP nparents, SEXP maxp, SEXP debug) { int nnodes = length(nodes), i = 0, j = 0; int *am = NULL, *ad = NULL, *w = NULL, *b = NULL, debuglevel = isTRUE(debug); int counter = 0, update = 1, from = 0, to = 0, *path = NULL, *scratch = NULL; double *cache_value = NULL, temp = 0, max = 0, tol = MACHINE_TOL; double *mp = REAL(maxp), *np = REAL(nparents); SEXP bestop; /* allocate and initialize the return value (use FALSE as a canary value). */ PROTECT(bestop = allocVector(VECSXP, 3)); setAttrib(bestop, R_NamesSymbol, mkStringVec(3, "op", "from", "to")); /* allocate and initialize a dummy FALSE object. */ SET_VECTOR_ELT(bestop, 0, ScalarLogical(FALSE)); /* allocate buffers for c_has_path(). */ path = Calloc1D(nnodes, sizeof(int)); scratch = Calloc1D(nnodes, sizeof(int)); /* save pointers to the numeric/integer matrices. */ cache_value = REAL(cache); ad = INTEGER(added); am = INTEGER(amat); w = INTEGER(wlmat); b = INTEGER(blmat); if (debuglevel > 0) { /* count how may arcs are to be tested. */ for (i = 0; i < nnodes * nnodes; i++) counter += ad[i]; Rprintf("----------------------------------------------------------------\n"); Rprintf("* trying to add one of %d arcs.\n", counter); }/*THEN*/ for (i = 0; i < nnodes; i++) { for (j = 0; j < nnodes; j++) { /* nothing to see, move along. */ if (ad[CMC(i, j, nnodes)] == 0) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, nnodes)]; if (debuglevel > 0) { Rprintf(" > trying to add %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ /* this score delta is the best one at the moment, so add the arc if it * does not introduce cycles in the graph. */ if (temp - max > tol) { if (c_has_path(j, i, am, nnodes, nodes, FALSE, FALSE, path, scratch, FALSE)) { if (debuglevel > 0) Rprintf(" > not adding, introduces cycles in the graph.\n"); continue; }/*THEN*/ if (debuglevel > 0) Rprintf(" @ adding %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "set", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ from = i; to = j; /* update the threshold score delta. */ max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ if (debuglevel > 0) { /* count how may arcs are to be tested. */ for (i = 0, counter = 0; i < nnodes * nnodes; i++) counter += am[i] * (1 - w[i]); Rprintf("----------------------------------------------------------------\n"); Rprintf("* trying to remove one of %d arcs.\n", counter); }/*THEN*/ for (i = 0; i < nnodes; i++) { for (j = 0; j < nnodes; j++) { /* nothing to see, move along. */ if (am[CMC(i, j, nnodes)] == 0) continue; /* whitelisted arcs are not to be removed, ever. */ if (w[CMC(i, j, nnodes)] == 1) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, nnodes)]; if (debuglevel > 0) { Rprintf(" > trying to remove %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ if (temp - max > tol) { if (debuglevel > 0) Rprintf(" @ removing %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "drop", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ from = i; to = j; /* update the threshold score delta. */ max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ if (debuglevel > 0) { /* count how may arcs are to be tested. */ for (i = 0, counter = 0; i < nnodes; i++) for (j = 0; j < nnodes; j++) counter += am[CMC(i, j, nnodes)] * (1 - b[CMC(j, i, nnodes)]); Rprintf("----------------------------------------------------------------\n"); Rprintf("* trying to reverse one of %d arcs.\n", counter); }/*THEN*/ for (i = 0; i < nnodes; i++) { for (j = 0; j < nnodes; j++) { /* nothing to see, move along. */ if (am[CMC(i, j, nnodes)] == 0) continue; /* don't reverse an arc if the one in the opposite direction is * blacklisted, ever. */ if (b[CMC(j, i, nnodes)] == 1) continue; /* do not reverse an arc if that means violating the limit on the * maximum number of parents. */ if (np[i] >= *mp) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, nnodes)] + cache_value[CMC(j, i, nnodes)]; /* nuke small values and negative zeroes. */ if (fabs(temp) < tol) temp = 0; if (debuglevel > 0) { Rprintf(" > trying to reverse %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ if (temp - max > tol) { if (c_has_path(i, j, am, nnodes, nodes, FALSE, TRUE, path, scratch, FALSE)) { if (debuglevel > 0) Rprintf(" > not reversing, introduces cycles in the graph.\n"); continue; }/*THEN*/ if (debuglevel > 0) Rprintf(" @ reversing %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "reverse", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ from = i; to = j; /* both nodes' reference scores must be updated. */ update = 2; /* update the threshold score delta. */ max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ /* update the reference scores. */ REAL(reference)[to] += cache_value[CMC(from, to, nnodes)]; if (update == 2) REAL(reference)[from] += cache_value[CMC(to, from, nnodes)]; Free1D(path); Free1D(scratch); UNPROTECT(1); return bestop; }/*HC_OPT_STEP*/
/* try to reverse an arc in the current network, minding the tabu list. */ void tabu_rev(double *cache_value, int *b, int *am, SEXP bestop, SEXP nodes, int *nnodes, int *from, int *to, double *max, int *update, SEXP tabu_list, int *cur, int *narcs, int *debuglevel) { int i = 0, j = 0, idx = 0; double temp = 0, tol = MACHINE_TOL; for (i = 0; i < *nnodes; i++) { for (j = 0; j < *nnodes; j++) { /* nothing to see, move along. */ if (am[CMC(i, j, *nnodes)] == 0) continue; /* don't reverse an arc if the one in the opposite direction is * blacklisted, ever. */ if (b[CMC(j, i, *nnodes)] == 1) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, *nnodes)] + cache_value[CMC(j, i, *nnodes)]; if (*debuglevel > 0) { Rprintf(" > trying to reverse %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ if (temp - *max > tol) { if (c_has_path(i, j, am, *nnodes, nodes, FALSE, TRUE, FALSE)) { if (*debuglevel > 0) Rprintf(" > not reversing, introduces cycles in the graph.\n"); continue; }/*THEN*/ /* update the adjacency matrix. */ am[CMC(i, j, *nnodes)] = 0; am[CMC(j, i, *nnodes)] = 1; /* lookup in the tabu list. */ idx = tabu_match(tabu_list, cur, am, narcs, nnodes, debuglevel); /* undo the changes in the adjacency matrix. */ am[CMC(i, j, *nnodes)] = 1; am[CMC(j, i, *nnodes)] = 0; if (idx > 0) { if (*debuglevel > 0) Rprintf(" > not reversing, network matches element %d in the tabu list.\n", idx); continue; }/*THEN*/ if (*debuglevel > 0) Rprintf(" @ reversing %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "reverse", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ *from = i; *to = j; /* both nodes' reference scores must be updated. */ *update = 2; /* update the threshold score delta. */ *max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ }/*TABU_REV*/
/* try to add an arc to the current network, minding the tabu list. */ void tabu_add(double *cache_value, int *ad, int *am, SEXP bestop, SEXP nodes, int *nnodes, int *from, int *to, double *max, SEXP tabu_list, int *cur, int *narcs, int *debuglevel) { int i = 0, j = 0, idx = 0; double temp = 0, tol = MACHINE_TOL; for (i = 0; i < *nnodes; i++) { for (j = 0; j < *nnodes; j++) { /* nothing to see, move along. */ if (ad[CMC(i, j, *nnodes)] == 0) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, *nnodes)]; if (*debuglevel > 0) { Rprintf(" > trying to add %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ /* this score delta is the best one at the moment, so add the arc if it * does not introduce cycles in the graph and does not match any of the * networks stored in the tabu list. */ if (temp - *max > tol) { if (c_has_path(j, i, am, *nnodes, nodes, FALSE, FALSE, FALSE)) { if (*debuglevel > 0) Rprintf(" > not adding, introduces cycles in the graph.\n"); continue; }/*THEN*/ /* update the adjacency matrix. */ am[CMC(i, j, *nnodes)] = 1; *narcs += 1; /* lookup in the tabu list. */ idx = tabu_match(tabu_list, cur, am, narcs, nnodes, debuglevel); /* undo the changes in the adjacency matrix. */ am[CMC(i, j, *nnodes)] = 0; *narcs -= 1; if (idx > 0) { if (*debuglevel > 0) Rprintf(" > not adding, network matches element %d in the tabu list.\n", idx); continue; }/*THEN*/ if (*debuglevel > 0) Rprintf(" @ adding %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "set", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ *from = i; *to = j; /* update the threshold score delta. */ *max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ }/*TABU_ADD*/