/* return the complete orientation of a graph (the nodes argument gives * the node ordering). */ SEXP pdag2dag(SEXP arcs, SEXP nodes) { int i = 0, j = 0, n = length(nodes); int *a = NULL; SEXP amat, res; /* build the adjacency matrix. */ PROTECT(amat = arcs2amat(arcs, nodes)); a = INTEGER(amat); /* scan the adjacency matrix. */ for (i = 0; i < n; i++) { for (j = i + 1; j < n; j++) { /* if an arc is undirected, kill the orientation that violates the * specified node ordering (the one which is located in the lower * half of the matrix). */ if ((a[CMC(i, j, n)] == 1) && (a[CMC(j, i, n)] == 1)) a[CMC(j, i, n)] = 0; }/*FOR*/ }/*FOR*/ /* build the return value. */ PROTECT(res = amat2arcs(amat, nodes)); UNPROTECT(2); return res; }/*PDAG2DAG*/
SEXP smart_network_averaging(SEXP arcs, SEXP nodes, SEXP weights) { int k = 0, from = 0, to = 0, nrows = length(arcs) / 2, dims = length(nodes); int *a = NULL, *coords = NULL, *poset = NULL, *path = NULL, *scratch = NULL; double *w = NULL; SEXP weights2, amat, try, acyclic; /* allocate and initialize the adjacency matrix. */ PROTECT(amat = allocMatrix(INTSXP, dims, dims)); a = INTEGER(amat); memset(a, '\0', sizeof(int) * dims * dims); /* match the node labels in the arc set. */ PROTECT(try = match(nodes, arcs, 0)); coords = INTEGER(try); /* duplicate the weights to preserve the original ones. */ PROTECT(weights2 = duplicate(weights)); w = REAL(weights2); /* sort the strength coefficients. */ poset = Calloc1D(nrows, sizeof(int)); for (k = 0; k < nrows; k++) poset[k] = k; R_qsort_I(w, poset, 1, nrows); /* allocate buffers for c_has_path(). */ path = Calloc1D(dims, sizeof(int)); scratch = Calloc1D(dims, sizeof(int)); /* iterate over the arcs in reverse order wrt their strength coefficients. */ for (k = 0; k < nrows; k++) { from = coords[poset[k]] - 1; to = coords[poset[k] + nrows] - 1; /* add an arc only if it does not introduce cycles. */ if (!c_has_path(to, from, a, dims, nodes, FALSE, TRUE, path, scratch, FALSE)) a[CMC(from, to, dims)] = 1; else warning("arc %s -> %s would introduce cycles in the graph, ignoring.", NODE(from), NODE(to)); }/*FOR*/ /* convert the adjacency matrix back to an arc set and return it. */ acyclic = amat2arcs(amat, nodes); Free1D(path); Free1D(scratch); Free1D(poset); UNPROTECT(3); return acyclic; }/*SMART_NETWORK_AVERAGING*/
/* generate a graph with given node ordering and arc probability. */ SEXP ordered_graph(SEXP nodes, SEXP num, SEXP prob) { int i = 0, j = 0, k = 0, nnodes = LENGTH(nodes), *a = NULL, *n = INTEGER(num); double *p = REAL(prob); SEXP list, res, args, argnames, amat, arcs, cached, debug2, null, temp; /* a fake debug argument (set to FALSE) for cache_structure(). */ PROTECT(debug2 = allocVector(LGLSXP, 1)); LOGICAL(debug2)[0] = FALSE; /* the list of optional arguments. */ PROTECT(argnames = allocVector(STRSXP, 1)); SET_STRING_ELT(argnames, 0, mkChar("prob")); PROTECT(args = allocVector(VECSXP, 1)); setAttrib(args, R_NamesSymbol, argnames); SET_VECTOR_ELT(args, 0, prob); /* allocate and initialize the adjacency matrix. */ PROTECT(amat = allocMatrix(INTSXP, nnodes, nnodes)); a = INTEGER(amat); memset(a, '\0', nnodes * nnodes * sizeof(int)); GetRNGstate(); #define ORDERED_AMAT(prob) \ for (i = 0; i < nnodes; i++) \ for (j = i + 1; j < nnodes; j++) \ if (unif_rand() < prob) \ a[CMC(i, j, nnodes)] = 1; \ else \ a[CMC(i, j, nnodes)] = 0; \ /* return a list if more than one bn is generated. */ if (*n > 1) { PROTECT(list = allocVector(VECSXP, *n)); PROTECT(null = allocVector(NILSXP, 1)); /* generate the "bn" structure, with dummy NULLs for the "arcs" and * "nodes" elements (which will be initialized later on). */ PROTECT(res = bn_base_structure(nodes, args, null, null, 0, "none", "ordered")); for (k = 0; k < *n; k++) { /* sample each arc in the upper-triangular portion of the adjacency matrix * (so that node ordering is conserved) with the specified probability. */ ORDERED_AMAT(*p); /* generate the arc set and the cached information form the adjacency * matrix. */ PROTECT(arcs = amat2arcs(amat, nodes)); PROTECT(cached = cache_structure(nodes, amat, debug2)); SET_VECTOR_ELT(res, 1, cached); SET_VECTOR_ELT(res, 2, arcs); /* save the structure in the list. */ PROTECT(temp = duplicate(res)); SET_VECTOR_ELT(list, k, temp); UNPROTECT(3); }/*FOR*/ PutRNGstate(); UNPROTECT(7); return list; }/*THEN*/ else { /* sample each arc in the upper-triangular portion of the adjacency matrix * (so that node ordering is conserved) with the specified probability. */ ORDERED_AMAT(*p); /* generate the arc set and the cached information form the adjacency * matrix. */ PROTECT(arcs = amat2arcs(amat, nodes)); PROTECT(cached = cache_structure(nodes, amat, debug2)); /* generate the "bn" structure. */ PROTECT(res = bn_base_structure(nodes, args, arcs, cached, 0, "none", "ordered")); PutRNGstate(); UNPROTECT(7); return res; }/*ELSE*/ }/*ORDERED_GRAPH*/
/* generate a connected graph with uniform probability, subject to some * constraints on the degree of the nodes. */ SEXP ide_cozman_graph(SEXP nodes, SEXP num, SEXP burn_in, SEXP max_in_degree, SEXP max_out_degree, SEXP max_degree, SEXP connected, SEXP debug) { int i = 0, k = 0, nnodes = LENGTH(nodes), *n = INTEGER(num); int changed = 0, *work = NULL, *arc = NULL, *a = NULL, *burn = INTEGER(burn_in); int *degree = NULL, *in_degree = NULL, *out_degree = NULL; int *debuglevel = LOGICAL(debug), *cozman = LOGICAL(connected); double *max_in = REAL(max_in_degree), *max_out = REAL(max_out_degree), *max = REAL(max_degree); SEXP list, res, args, argnames, amat, arcs, cached, debug2, null, temp; char *label = (*cozman > 0) ? "ic-dag" : "melancon"; /* a fake debug argument (set to FALSE) for cache_structure(). */ PROTECT(debug2 = allocVector(LGLSXP, 1)); LOGICAL(debug2)[0] = FALSE; /* the list of optional arguments. */ PROTECT(argnames = allocVector(STRSXP, 4)); SET_STRING_ELT(argnames, 0, mkChar("burn.in")); SET_STRING_ELT(argnames, 1, mkChar("max.in.degree")); SET_STRING_ELT(argnames, 2, mkChar("max.out.degree")); SET_STRING_ELT(argnames, 3, mkChar("max.degree")); PROTECT(args = allocVector(VECSXP, 4)); setAttrib(args, R_NamesSymbol, argnames); SET_VECTOR_ELT(args, 0, burn_in); SET_VECTOR_ELT(args, 1, max_in_degree); SET_VECTOR_ELT(args, 2, max_out_degree); SET_VECTOR_ELT(args, 3, max_degree); /* allocate and initialize the adjacency matrix. */ PROTECT(amat = allocMatrix(INTSXP, nnodes, nnodes)); a = INTEGER(amat); memset(a, '\0', nnodes * nnodes * sizeof(int)); /* initialize a simple ordered tree with n nodes, where all nodes * have just one parent, except the first one that does not have * any parent. */ for (i = 1; i < nnodes; i++) a[CMC(i - 1, i, nnodes)] = 1; /* allocate the arrays needed by SampleNoReplace. */ arc = alloc1dcont(2); work = alloc1dcont(nnodes); /* allocate and initialize the degree arrays. */ degree = alloc1dcont(nnodes); in_degree = alloc1dcont(nnodes); out_degree = alloc1dcont(nnodes); for (i = 0; i < nnodes; i++) { in_degree[i] = out_degree[i] = 1; degree[i] = 2; }/*FOR*/ in_degree[0] = out_degree[nnodes - 1] = 0; degree[0] = degree[nnodes - 1] = 1; GetRNGstate(); /* wait for the markov chain monte carlo simulation to reach stationarity. */ for (k = 0; k < *burn; k++) { if (*debuglevel > 0) Rprintf("* current model (%d):\n", k + 1); changed = ic_logic(a, nodes, &nnodes, arc, work, degree, max, in_degree, max_in, out_degree, max_out, cozman, debuglevel); /* print the model string to allow a sane debugging experience; note that this * has a huge impact on performance, so use it with care. */ if ((*debuglevel > 0) && (changed)) { PROTECT(null = allocVector(NILSXP, 1)); PROTECT(res = bn_base_structure(nodes, args, null, null, 0, "none", label)); PROTECT(arcs = amat2arcs(amat, nodes)); PROTECT(cached = cache_structure(nodes, amat, debug2)); SET_VECTOR_ELT(res, 1, cached); SET_VECTOR_ELT(res, 2, arcs); print_modelstring(res); UNPROTECT(4); }/*THEN*/ }/*FOR*/ #define UPDATE_NODE_CACHE(cur) \ if (*debuglevel > 0) \ Rprintf(" > updating cached information about node %s.\n", NODE(cur)); \ memset(work, '\0', nnodes * sizeof(int)); \ PROTECT(temp = c_cache_partial_structure(cur, nodes, amat, work, debug2)); \ SET_VECTOR_ELT(cached, cur, temp); \ UNPROTECT(1); /* return a list if more than one bn is generated. */ if (*n > 1) { if (*debuglevel > 0) Rprintf("* end of the burn-in iterations.\n"); PROTECT(list = allocVector(VECSXP, *n)); PROTECT(null = allocVector(NILSXP, 1)); /* generate the "bn" structure, with dummy NULLs for the "arcs" and * "nodes" elements (which will be initialized later on). */ PROTECT(res = bn_base_structure(nodes, args, null, null, 0, "none", label)); for (k = 0; k < *n; k++) { if (*debuglevel > 0) Rprintf("* current model (%d):\n", *burn + k + 1); changed = ic_logic(a, nodes, &nnodes, arc, work, degree, max, in_degree, max_in, out_degree, max_out, cozman, debuglevel); if (changed || (k == 0)) { /* generate the arc set and the cached information from the adjacency * matrix. */ if (k > 0) { /* if a complete "bn" object is available, we can retrieve the cached * information about the nodes from the structure stored in the last * iteration and update only the elements that really need it. */ temp = VECTOR_ELT(VECTOR_ELT(list, k - 1), 1); PROTECT(cached = duplicate(temp)); /* update the first sampled nodes; both of them gain/lose either * a parent or a child. */ UPDATE_NODE_CACHE(arc[0] - 1); UPDATE_NODE_CACHE(arc[1] - 1); /* all the parents of the second sampled node gain/lose a node in * the markov blanket (the first sampled node, which shares a child * with all of them). */ for (i = 0; i < nnodes; i++) { if ((i != arc[0] - 1) && (a[CMC(i, arc[1] - 1, nnodes)] == 1)) { UPDATE_NODE_CACHE(i); }/*THEN*/ }/*FOR*/ }/*THEN*/ else { PROTECT(cached = cache_structure(nodes, amat, debug2)); }/*ELSE*/ PROTECT(arcs = amat2arcs(amat, nodes)); SET_VECTOR_ELT(res, 1, cached); SET_VECTOR_ELT(res, 2, arcs); /* print the model string to allow a sane debugging experience. */ if (*debuglevel > 0) print_modelstring(res); /* save the structure in the list. */ PROTECT(temp = duplicate(res)); SET_VECTOR_ELT(list, k, temp); UNPROTECT(3); }/*THEN*/ else { /* the adjacency matrix is unchanged; so we can just copy the bayesian * network from the previous iteration in the k-th slot of the list. */ SET_VECTOR_ELT(list, k, VECTOR_ELT(list, k - 1)); }/*ELSE*/ }/*FOR*/ PutRNGstate(); UNPROTECT(7); return list; }/*THEN*/ else { if (*debuglevel > 0) Rprintf("* end of the burn-in.\n* current model (%d):\n", *burn + 1); ic_logic(a, nodes, &nnodes, arc, work, degree, max, in_degree, max_in, out_degree, max_out, cozman, debuglevel); /* generate the arc set and the cached information form the adjacency * matrix. */ PROTECT(arcs = amat2arcs(amat, nodes)); PROTECT(cached = cache_structure(nodes, amat, debug2)); /* generate the "bn" structure. */ PROTECT(res = bn_base_structure(nodes, args, arcs, cached, 0, "none", label)); /* print the model string to allow a sane debugging experience. */ if (*debuglevel > 0) print_modelstring(res); PutRNGstate(); UNPROTECT(7); return res; }/*ELSE*/ }/*IDE_COZMAN_GRAPH*/
/* construct a consistent DAG extension of a CPDAG. */ SEXP pdag_extension(SEXP arcs, SEXP nodes, SEXP debug) { int i = 0, j = 0, k = 0, t = 0, nnodes = length(nodes); int changed = 0, left = nnodes; int *a = NULL, *nbr = NULL, debuglevel = isTRUE(debug); short int *matched = NULL; SEXP amat, result; /* build and dereference the adjacency matrix. */ PROTECT(amat = arcs2amat(arcs, nodes)); a = INTEGER(amat); /* allocate and initialize the neighbours and matched vectors. */ nbr = Calloc1D(nnodes, sizeof(int)); matched = Calloc1D(nnodes, sizeof(short int)); for (t = 0; t < nnodes; t++) { if (debuglevel > 0) { Rprintf("----------------------------------------------------------------\n"); Rprintf("> performing pass %d.\n", t + 1); Rprintf("> candidate nodes: "); for (j = 0; j < nnodes; j++) if (matched[j] == 0) Rprintf("%s ", NODE(j)); Rprintf("\n"); }/*THEN*/ for (i = 0; i < nnodes; i++) { /* if the node is already ok, skip it. */ if (matched[i] != 0) continue; /* check whether the node is a sink (that is, whether is does not have * any child). */ is_a_sink(a, i, &k, nnodes, nbr, matched); /* if the node is not a sink move on. */ if (k == -1) { if (debuglevel > 0) Rprintf(" * node %s is not a sink.\n", NODE(i)); continue; }/*THEN*/ else { if (debuglevel > 0) Rprintf(" * node %s is a sink.\n", NODE(i)); }/*ELSE*/ if (!all_adjacent(a, i, k, nnodes, nbr)) { if (debuglevel > 0) Rprintf(" * not all nodes linked to %s by an undirected arc are adjacent.\n", NODE(i)); continue; }/*THEN*/ else { if (debuglevel > 0) { if (k == 0) Rprintf(" * no node is linked to %s by an undirected arc.\n", NODE(i)); else Rprintf(" * all nodes linked to %s by an undirected arc are adjacent.\n", NODE(i)); }/*THEN*/ }/*ELSE*/ /* the current node meets all the conditions, direct all the arcs towards it. */ if (k == 0) { if (debuglevel > 0) Rprintf(" @ no undirected arc to direct towards %s.\n", NODE(i)); }/*THEN*/ else { for (j = 0; j < k; j++) a[CMC(i, nbr[j], nnodes)] = 0; if (debuglevel > 0) Rprintf(" @ directing all incident undirected arcs towards %s.\n", NODE(i)); }/*ELSE*/ /* set the changed flag. */ changed = 1; /* exclude the node from later iterations. */ matched[i] = 1; left--; }/*FOR*/ /* if nothing changed in the last iteration or there are no more candidate * nodes, there is nothing else to do. */ if ((changed == 0) || (left == 0)) break; else changed = 0; }/*FOR*/ /* build the new arc set from the adjacency matrix. */ PROTECT(result = amat2arcs(amat, nodes)); Free1D(nbr); Free1D(matched); UNPROTECT(2); return result; }/*PDAG_EXTENSION*/
/* an Ide-Cozman alternative for 2-nodes graphs. */ static SEXP ic_2nodes(SEXP nodes, SEXP num, SEXP burn_in, SEXP max_in_degree, SEXP max_out_degree, SEXP max_degree, SEXP connected, SEXP debug) { int i = 0, *n = INTEGER(num), *a = NULL; int *debuglevel = LOGICAL(debug); double u = 0; SEXP list, resA, resB, arcsA, arcsB, cachedA, cachedB; SEXP amatA, amatB, args, argnames, false; /* the list of optional arguments. */ PROTECT(argnames = allocVector(STRSXP, 4)); SET_STRING_ELT(argnames, 0, mkChar("burn.in")); SET_STRING_ELT(argnames, 1, mkChar("max.in.degree")); SET_STRING_ELT(argnames, 2, mkChar("max.out.degree")); SET_STRING_ELT(argnames, 3, mkChar("max.degree")); PROTECT(args = allocVector(VECSXP, 4)); setAttrib(args, R_NamesSymbol, argnames); SET_VECTOR_ELT(args, 0, burn_in); SET_VECTOR_ELT(args, 1, max_in_degree); SET_VECTOR_ELT(args, 2, max_out_degree); SET_VECTOR_ELT(args, 3, max_degree); /* allocate a FALSE variable. */ PROTECT(false = allocVector(LGLSXP, 1)); LOGICAL(false)[0] = FALSE; /* allocate and initialize the tow adjacency matrices. */ PROTECT(amatA = allocMatrix(INTSXP, 2, 2)); a = INTEGER(amatA); memset(a, '\0', sizeof(int) * 4); a[2] = 1; PROTECT(amatB = allocMatrix(INTSXP, 2, 2)); a = INTEGER(amatB); memset(a, '\0', sizeof(int) * 4); a[1] = 1; /* generates the arc sets. */ PROTECT(arcsA = amat2arcs(amatA, nodes)); PROTECT(arcsB = amat2arcs(amatB, nodes)); /* generate the cached node information. */ PROTECT(cachedA = cache_structure(nodes, amatA, false)); PROTECT(cachedB = cache_structure(nodes, amatB, false)); /* generate the two "bn" structures. */ PROTECT(resA = bn_base_structure(nodes, args, arcsA, cachedA, 0, "none", "empty")); PROTECT(resB = bn_base_structure(nodes, args, arcsB, cachedB, 0, "none", "empty")); if (*debuglevel > 0) Rprintf("* no burn-in required.\n"); GetRNGstate(); /* return a list if more than one bn is generated. */ if (*n > 1) { PROTECT(list = allocVector(VECSXP, *n)); for (i = 0; i < *n; i++) { if (*debuglevel > 0) Rprintf("* current model (%d):\n", i + 1); /* sample which graph to return. */ u = unif_rand(); if (u <= 0.5) { /* pick the graph with A -> B. */ SET_VECTOR_ELT(list, i, resA); /* print the model string to allow a sane debugging experience. */ if (*debuglevel > 0) print_modelstring(resA); }/*THEN*/ else { /* pick the graph with B -> A. */ SET_VECTOR_ELT(list, i, resB); /* print the model string to allow a sane debugging experience. */ if (*debuglevel > 0) print_modelstring(resB); }/*ELSE*/ }/*FOR*/ PutRNGstate(); UNPROTECT(12); return list; }/*THEN*/ else { if (*debuglevel > 0) Rprintf("* current model (1):\n"); /* sample which graph to return. */ u = unif_rand(); PutRNGstate(); UNPROTECT(11); if (u <= 0.5) { /* print the model string to allow a sane debugging experience. */ if (*debuglevel > 0) print_modelstring(resA); /* return the graph with A -> B. */ return resA; }/*THEN*/ else { /* print the model string to allow a sane debugging experience. */ if (*debuglevel > 0) print_modelstring(resB); /* return the graph with B -> A. */ return resB; }/*ELSE*/ }/*ELSE*/ }/*IC_2NODES*/
SEXP hc_to_be_added(SEXP arcs, SEXP blacklist, SEXP whitelist, SEXP nparents, SEXP maxp, SEXP nodes, SEXP convert) { int i = 0, j = 0, narcs = 0, dims = length(nodes); int *a = NULL, *coords = NULL; double *mp = REAL(maxp), *np = NULL; short int referenced = 0; SEXP try, result = R_NilValue, result2; /* transform the arc set into an adjacency matrix, if it's not one already. */ if (isInteger(arcs)) { if ((referenced = MAYBE_REFERENCED(arcs))) PROTECT(result = duplicate(arcs)); }/*THEN*/ else { PROTECT(result = arcs2amat(arcs, nodes)); }/*ELSE*/ /* dereference the adjacency matrix once and for all. */ a = INTEGER(result); /* compute the number the parents of each node, unless provided. */ if (nparents == R_NilValue) { np = Calloc1D(dims, sizeof(double)); for (i = 0; i < dims; i++) for (j = 0; j < dims; j++) np[j] = a[CMC(i, j, dims)]; }/*THEN*/ else { np = REAL(nparents); }/*ELSE*/ /* flip all the nondiagonal cells. */ for (j = 0; j < dims; j++) { for (i = 0; i < dims; i++) { /* diagonal elements are always equal to zero, skip them. */ if (i == j) continue; a[CMC(i, j, dims)] = 1 - a[CMC(i, j, dims)]; }/*FOR*/ }/*FOR*/ /* if an arc is present in the graph in one direction, you cannot add it in * the other direction (it would be a reversal); flip both in the adjacency * matrix. */ for (j = 0; j < dims; j++) for (i = j + 1; i < dims; i++) a[CMC(j, i, dims)] = a[CMC(i, j, dims)] = a[CMC(i, j, dims)] * a[CMC(j, i, dims)]; /* if a node has already reached its maximum number parents, do not add * more arcs pointing to that node. */ for (j = 0; j < dims; j++) if (np[j] >= *mp) memset(a + j * dims, '\0', dims * sizeof(int)); #define FLIP_FROM_LIST(list, value) \ if (!isNull(list)) { \ if (!isInteger(list)) { \ PROTECT(try = match(nodes, list, 0)); \ coords = INTEGER(try); \ narcs = length(try)/2; \ for (i = 0; i < narcs; i++) \ a[CMC(coords[i] - 1, coords[i + narcs] - 1, dims)] = value; \ UNPROTECT(1); \ }/*THEN*/ \ else { \ coords = INTEGER(list); \ for (i = 0; i < dims * dims; i ++) \ if (coords[i] == 1) \ a[i] = value; \ }/*ELSE*/ \ }/*THEN*/ /* now the blacklist gets involved. */ FLIP_FROM_LIST(blacklist, 0); /* and, last but not least, the whitelist gets involved. */ FLIP_FROM_LIST(whitelist, 1); if (nparents == R_NilValue) Free1D(np); /* return either the adjacency matrix or the arc set. */ if (isTRUE(convert)) { PROTECT(result2 = amat2arcs(result, nodes)); if (referenced || !isInteger(arcs)) UNPROTECT(2); else UNPROTECT(1); return result2; }/*THEN*/ else { if (referenced || !isInteger(arcs)) UNPROTECT(1); return result; }/*ELSE*/ }/*HC_TO_BE_ADDED*/