/* return the complete orientation of a graph (the nodes argument gives * the node ordering). */ SEXP pdag2dag(SEXP arcs, SEXP nodes) { int i = 0, j = 0, n = length(nodes); int *a = NULL; SEXP amat, res; /* build the adjacency matrix. */ PROTECT(amat = arcs2amat(arcs, nodes)); a = INTEGER(amat); /* scan the adjacency matrix. */ for (i = 0; i < n; i++) { for (j = i + 1; j < n; j++) { /* if an arc is undirected, kill the orientation that violates the * specified node ordering (the one which is located in the lower * half of the matrix). */ if ((a[CMC(i, j, n)] == 1) && (a[CMC(j, i, n)] == 1)) a[CMC(j, i, n)] = 0; }/*FOR*/ }/*FOR*/ /* build the return value. */ PROTECT(res = amat2arcs(amat, nodes)); UNPROTECT(2); return res; }/*PDAG2DAG*/
/* adjusted arc counting for boot.strength(). */ SEXP bootstrap_strength_counters(SEXP prob, SEXP weight, SEXP arcs, SEXP nodes) { int i = 0, j = 0, n = length(nodes), *a = NULL; double *p = NULL, *w = NULL; SEXP amat; /* build the adjacency matrix for the current network. */ PROTECT(amat = arcs2amat(arcs, nodes)); /* map the contents of the SEXPs for easy access. */ a = INTEGER(amat); p = REAL(prob); w = REAL(weight); for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { /* increase the counter of 1/2 for an undirected arc (the other half * is added to the symmetric element in the matrix) or of 1 for a * direcxted arc. */ if (a[CMC(i, j, n)] == 1) { if (a[CMC(j, i, n)] == 1) p[CMC(i, j, n)] += 0.5 * (*w); else p[CMC(i, j, n)] += 1 * (*w); }/*THEN*/ }/*FOR*/ }/*FOR*/ UNPROTECT(1); return prob; }/*BOOTSTRAP_STRENGTH*/
/* construct a consistent DAG extension of a CPDAG. */ SEXP pdag_extension(SEXP arcs, SEXP nodes, SEXP debug) { int i = 0, j = 0, k = 0, t = 0, nnodes = length(nodes); int changed = 0, left = nnodes; int *a = NULL, *nbr = NULL, debuglevel = isTRUE(debug); short int *matched = NULL; SEXP amat, result; /* build and dereference the adjacency matrix. */ PROTECT(amat = arcs2amat(arcs, nodes)); a = INTEGER(amat); /* allocate and initialize the neighbours and matched vectors. */ nbr = Calloc1D(nnodes, sizeof(int)); matched = Calloc1D(nnodes, sizeof(short int)); for (t = 0; t < nnodes; t++) { if (debuglevel > 0) { Rprintf("----------------------------------------------------------------\n"); Rprintf("> performing pass %d.\n", t + 1); Rprintf("> candidate nodes: "); for (j = 0; j < nnodes; j++) if (matched[j] == 0) Rprintf("%s ", NODE(j)); Rprintf("\n"); }/*THEN*/ for (i = 0; i < nnodes; i++) { /* if the node is already ok, skip it. */ if (matched[i] != 0) continue; /* check whether the node is a sink (that is, whether is does not have * any child). */ is_a_sink(a, i, &k, nnodes, nbr, matched); /* if the node is not a sink move on. */ if (k == -1) { if (debuglevel > 0) Rprintf(" * node %s is not a sink.\n", NODE(i)); continue; }/*THEN*/ else { if (debuglevel > 0) Rprintf(" * node %s is a sink.\n", NODE(i)); }/*ELSE*/ if (!all_adjacent(a, i, k, nnodes, nbr)) { if (debuglevel > 0) Rprintf(" * not all nodes linked to %s by an undirected arc are adjacent.\n", NODE(i)); continue; }/*THEN*/ else { if (debuglevel > 0) { if (k == 0) Rprintf(" * no node is linked to %s by an undirected arc.\n", NODE(i)); else Rprintf(" * all nodes linked to %s by an undirected arc are adjacent.\n", NODE(i)); }/*THEN*/ }/*ELSE*/ /* the current node meets all the conditions, direct all the arcs towards it. */ if (k == 0) { if (debuglevel > 0) Rprintf(" @ no undirected arc to direct towards %s.\n", NODE(i)); }/*THEN*/ else { for (j = 0; j < k; j++) a[CMC(i, nbr[j], nnodes)] = 0; if (debuglevel > 0) Rprintf(" @ directing all incident undirected arcs towards %s.\n", NODE(i)); }/*ELSE*/ /* set the changed flag. */ changed = 1; /* exclude the node from later iterations. */ matched[i] = 1; left--; }/*FOR*/ /* if nothing changed in the last iteration or there are no more candidate * nodes, there is nothing else to do. */ if ((changed == 0) || (left == 0)) break; else changed = 0; }/*FOR*/ /* build the new arc set from the adjacency matrix. */ PROTECT(result = amat2arcs(amat, nodes)); Free1D(nbr); Free1D(matched); UNPROTECT(2); return result; }/*PDAG_EXTENSION*/
SEXP is_pdag_acyclic(SEXP arcs, SEXP nodes, SEXP return_nodes, SEXP directed, SEXP debug) { int i = 0, j = 0, z = 0; int nrows = LENGTH(nodes); int check_status = nrows, check_status_old = nrows; int *rowsums = NULL, *colsums = NULL, *crossprod = NULL, *a = NULL; int *debuglevel = NULL; short int *status = NULL; SEXP amat; /* dereference the debug parameter. */ debuglevel = LOGICAL(debug); /* build the adjacency matrix from the arc set. */ if (*debuglevel > 0) Rprintf("* building the adjacency matrix.\n"); PROTECT(amat = arcs2amat(arcs, nodes)); a = INTEGER(amat); /* should we consider only directed arcs? */ if (isTRUE(directed)) { /* removing undirected arcs, so that only cycles made only of directed * arcs will make the function return TRUE. */ for (i = 0; i < nrows; i++) for (j = 0; j < nrows; j++) if ((a[CMC(i, j, nrows)] == 1) && (a[CMC(j, i, nrows)] == 1)) a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0; }/*THEN*/ /* initialize the status, {row,col}sums and crossprod arrays. */ status = allocstatus(nrows); rowsums = alloc1dcont(nrows); colsums = alloc1dcont(nrows); crossprod = alloc1dcont(nrows); if (*debuglevel > 0) Rprintf("* checking whether the partially directed graph is acyclic.\n"); /* even in the worst case scenario at least two nodes are marked as * good at each iteration, so even ceil(nrows/2) iterations should be * enough. */ for (z = 0; z < nrows; z++) { start: if (*debuglevel > 0) Rprintf("* beginning iteration %d.\n", z + 1); for (i = 0; i < nrows; i++) { /* skip known-good nodes. */ if (status[i] == GOOD) continue; /* reset and update row and column totals. */ rowsums[i] = colsums[i] = crossprod[i] = 0; /* compute row and column totals for the i-th node. */ for (j = 0; j < nrows; j++) { rowsums[i] += a[CMC(i, j, nrows)]; colsums[i] += a[CMC(j, i, nrows)]; crossprod[i] += a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)]; }/*FOR*/ there: if (*debuglevel > 0) Rprintf(" > checking node %s (%d child(ren), %d parent(s), %d neighbours).\n", NODE(i), rowsums[i], colsums[i], crossprod[i]); /* if either total is zero, the node is either a root node or a * leaf node, and is not part of any cycle. */ if (((rowsums[i] == 0) || (colsums[i] == 0)) || ((crossprod[i] == 1) && (rowsums[i] == 1) && (colsums[i] == 1))) { if (*debuglevel > 0) Rprintf(" @ node %s is cannot be part of a cycle.\n", NODE(i)); /* update the adjacency matrix and the row/column totals. */ for (j = 0; j < nrows; j++) a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0; rowsums[i] = colsums[i] = crossprod[i] = 0; /* mark the node as good. */ status[i] = GOOD; check_status--; }/*THEN*/ else if (crossprod[i] == 1) { /* find the other of the undirected arc. */ for (j = 0; j < i; j++) if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1) break; /* safety check, just in case. */ if (i == j) continue; if (((colsums[i] == 1) && (colsums[j] == 1)) || ((rowsums[i] == 1) && (rowsums[j] == 1))) { if (*debuglevel > 0) Rprintf(" @ arc %s - %s is cannot be part of a cycle.\n", NODE(i), NODE(j)); /* update the adjacency matrix and the row/column totals. */ a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0; crossprod[i] = 0; rowsums[i]--; colsums[i]--; rowsums[j]--; colsums[j]--; /* jump back to the first check; if either the row or column total * was equal to 1 only because of the undirected arc, the node can * now be marked as good. */ if ((rowsums[i] == 0) || (colsums[i] == 0)) goto there; }/*THEN*/ }/*THEN*/ }/*FOR*/ /* at least three nodes are needed to have a cycle. */ if (check_status < 3) { if (*debuglevel > 0) Rprintf("@ at least three nodes are needed to have a cycle.\n"); UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*THEN*/ /* if there are three or more bad nodes and there was no change in * the last iteration, the algorithm is stuck on a cycle. */ if (check_status_old == check_status) { if (*debuglevel > 0) Rprintf("@ no change in the last iteration.\n"); /* give up and call c_has_path() to kill some undirected arcs. */ for (i = 0; i < nrows; i++) for (j = 0; j < i; j++) if (a[CMC(i, j, nrows)] * a[CMC(j, i, nrows)] == 1) { /* remove the arc from the adjacency matrix while testing it, * there's a path is always found (the arc itself). */ a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0; if(!c_has_path(i, j, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE) && !c_has_path(j, i, INTEGER(amat), nrows, nodes, FALSE, TRUE, FALSE)) { if (*debuglevel > 0) Rprintf("@ arc %s - %s is not part of any cycle, removing.\n", NODE(i), NODE(j)); /* increase the iteration counter and start again. */ z++; goto start; }/*THEN*/ else { /* at least one cycle is really present; give up and return. */ UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*ELSE*/ }/*THEN*/ /* give up if there are no undirected arcs, cycles composed * entirely by directed arcs are never false positives. */ UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*THEN*/ else { check_status_old = check_status; }/*ELSE*/ }/*FOR*/ UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*IS_PDAG_ACYCLIC*/
SEXP hc_to_be_added(SEXP arcs, SEXP blacklist, SEXP whitelist, SEXP nparents, SEXP maxp, SEXP nodes, SEXP convert) { int i = 0, j = 0, narcs = 0, dims = length(nodes); int *a = NULL, *coords = NULL; double *mp = REAL(maxp), *np = NULL; short int referenced = 0; SEXP try, result = R_NilValue, result2; /* transform the arc set into an adjacency matrix, if it's not one already. */ if (isInteger(arcs)) { if ((referenced = MAYBE_REFERENCED(arcs))) PROTECT(result = duplicate(arcs)); }/*THEN*/ else { PROTECT(result = arcs2amat(arcs, nodes)); }/*ELSE*/ /* dereference the adjacency matrix once and for all. */ a = INTEGER(result); /* compute the number the parents of each node, unless provided. */ if (nparents == R_NilValue) { np = Calloc1D(dims, sizeof(double)); for (i = 0; i < dims; i++) for (j = 0; j < dims; j++) np[j] = a[CMC(i, j, dims)]; }/*THEN*/ else { np = REAL(nparents); }/*ELSE*/ /* flip all the nondiagonal cells. */ for (j = 0; j < dims; j++) { for (i = 0; i < dims; i++) { /* diagonal elements are always equal to zero, skip them. */ if (i == j) continue; a[CMC(i, j, dims)] = 1 - a[CMC(i, j, dims)]; }/*FOR*/ }/*FOR*/ /* if an arc is present in the graph in one direction, you cannot add it in * the other direction (it would be a reversal); flip both in the adjacency * matrix. */ for (j = 0; j < dims; j++) for (i = j + 1; i < dims; i++) a[CMC(j, i, dims)] = a[CMC(i, j, dims)] = a[CMC(i, j, dims)] * a[CMC(j, i, dims)]; /* if a node has already reached its maximum number parents, do not add * more arcs pointing to that node. */ for (j = 0; j < dims; j++) if (np[j] >= *mp) memset(a + j * dims, '\0', dims * sizeof(int)); #define FLIP_FROM_LIST(list, value) \ if (!isNull(list)) { \ if (!isInteger(list)) { \ PROTECT(try = match(nodes, list, 0)); \ coords = INTEGER(try); \ narcs = length(try)/2; \ for (i = 0; i < narcs; i++) \ a[CMC(coords[i] - 1, coords[i + narcs] - 1, dims)] = value; \ UNPROTECT(1); \ }/*THEN*/ \ else { \ coords = INTEGER(list); \ for (i = 0; i < dims * dims; i ++) \ if (coords[i] == 1) \ a[i] = value; \ }/*ELSE*/ \ }/*THEN*/ /* now the blacklist gets involved. */ FLIP_FROM_LIST(blacklist, 0); /* and, last but not least, the whitelist gets involved. */ FLIP_FROM_LIST(whitelist, 1); if (nparents == R_NilValue) Free1D(np); /* return either the adjacency matrix or the arc set. */ if (isTRUE(convert)) { PROTECT(result2 = amat2arcs(result, nodes)); if (referenced || !isInteger(arcs)) UNPROTECT(2); else UNPROTECT(1); return result2; }/*THEN*/ else { if (referenced || !isInteger(arcs)) UNPROTECT(1); return result; }/*ELSE*/ }/*HC_TO_BE_ADDED*/
SEXP is_dag_acyclic(SEXP arcs, SEXP nodes, SEXP return_nodes, SEXP debug) { SEXP amat; int i = 0, j = 0, z = 0; int nrows = LENGTH(nodes); int check_status = nrows; int check_status_old = nrows; int rowsums, colsums; int *a = NULL, *debuglevel = NULL; short int *status = NULL; /* dereference the debug parameter. */ debuglevel = LOGICAL(debug); /* build the adjacency matrix from the arc set. */ if (*debuglevel > 0) Rprintf("* building the adjacency matrix.\n"); PROTECT(amat = arcs2amat(arcs, nodes)); a = INTEGER(amat); /* allocate and initialize the status array. */ status = allocstatus(nrows); if (*debuglevel > 0) Rprintf("* checking whether the directed graph is acyclic.\n"); /* even in the worst case scenario at least two nodes are marked as * good at each iteration, so even ceil(nrows/2) iterations should be * enough. */ for (z = 0; z < nrows; z++) { if (*debuglevel > 0) Rprintf("* beginning iteration %d.\n", z + 1); for (i = 0; i < nrows; i++) { /* skip known-good nodes. */ if (status[i] == GOOD) continue; /* reset and update row and column totals. */ rowsums = colsums = 0; /* compute row and column totals for the i-th node. */ for (j = 0; j < nrows; j++) { rowsums += a[CMC(i, j, nrows)]; colsums += a[CMC(j, i, nrows)]; }/*FOR*/ if (*debuglevel > 0) Rprintf(" > checking node %s (%d child(ren), %d parent(s)).\n", NODE(i), rowsums, colsums); /* if either total is zero, the node is either a root node or a * leaf node, and is not part of any cycle. */ if ((rowsums == 0) || (colsums == 0)) { if (*debuglevel > 0) Rprintf(" @ node %s is cannot be part of a cycle.\n", NODE(i)); for (j = 0; j < nrows; j++) a[CMC(i, j, nrows)] = a[CMC(j, i, nrows)] = 0; /* mark the node as good. */ status[i] = GOOD; check_status--; }/*THEN*/ }/*FOR*/ /* at least three nodes are needed to have a cycle. */ if (check_status < 3) { if (*debuglevel > 0) Rprintf("@ at least three nodes are needed to have a cycle.\n"); UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*THEN*/ /* if there are three or more bad nodes and there was no change in * the last iteration, the algorithm is stuck on a cycle. */ if (check_status_old == check_status) { if (*debuglevel > 0) Rprintf("@ no change in the last iteration.\n"); UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*THEN*/ else check_status_old = check_status; }/*FOR*/ UNPROTECT(1); return build_return_array(nodes, status, nrows, check_status, return_nodes); }/*IS_DAG_ACYCLIC*/