SEXP count_observed_values(SEXP data) { int i = 0, j = 0, ncol = length(data), nrow = length(VECTOR_ELT(data, 0)); int *rr = NULL, *cc = NULL, *temp_integer = NULL; double *temp_real = NULL; SEXP counts, rows, cols, temp; PROTECT(counts = allocVector(VECSXP, 2)); setAttrib(counts, R_NamesSymbol, mkStringVec(2, "rows", "columns")); PROTECT(rows = allocVector(INTSXP, nrow)); PROTECT(cols = allocVector(INTSXP, ncol)); setAttrib(cols, R_NamesSymbol, getAttrib(data, R_NamesSymbol)); SET_VECTOR_ELT(counts, 0, rows); SET_VECTOR_ELT(counts, 1, cols); rr = INTEGER(rows); cc = INTEGER(cols); memset(rr, '\0', nrow * sizeof(int)); memset(cc, '\0', ncol * sizeof(int)); for (j = 0; j < ncol; j++) { temp = VECTOR_ELT(data, j); switch(TYPEOF(temp)) { case REALSXP: temp_real = REAL(temp); for (i = 0; i < nrow; i++) { rr[i] += !ISNAN(temp_real[i]); cc[j] += !ISNAN(temp_real[i]); }/*FOR*/ break; case INTSXP: temp_integer = INTEGER(temp); for (i = 0; i < nrow; i++) { rr[i] += (temp_integer[i] != NA_INTEGER); cc[j] += (temp_integer[i] != NA_INTEGER); }/*FOR*/ break; }/*SWITCH*/ }/*FOR*/ UNPROTECT(3); return counts; }/*COUNT_OBSERVED_VALUES*/
/* convert an arc set to a weighted edge list. */ SEXP arcs2welist(SEXP arcs, SEXP nodes, SEXP weights, SEXP nid, SEXP sublist, SEXP parents) { int i = 0, j = 0, k = 0, nnodes = length(nodes), narcs = length(arcs)/2; int *e = NULL, *coords = NULL, *adjacent = NULL; int num_id = isTRUE(nid), sub = isTRUE(sublist), up = isTRUE(parents); double *w = REAL(weights), *ew = NULL; SEXP try, elist, edges, edge_weights, temp, temp_name = R_NilValue; /* allocate the return value. */ PROTECT(elist = allocVector(VECSXP, nnodes)); /* set the node names. */ setAttrib(elist, R_NamesSymbol, nodes); /* allocate and initialize the subset name. */ if (sub > 0) PROTECT(temp_name = mkStringVec(2, "edges", "weight")); /* allocate the scratch space to keep track adjacent nodes. */ adjacent = alloc1dcont(nnodes); /* match the node labels in the arc set. */ PROTECT(try = match(nodes, arcs, 0)); coords = INTEGER(try); for (i = 0; i < narcs; i++) adjacent[coords[i + up * narcs] - 1]++; for (i = 0; i < nnodes; i++) { /* allocate and set up the edge array. */ if (num_id > 0) { PROTECT(edges = allocVector(INTSXP, adjacent[i])); e = INTEGER(edges); }/*THEN*/ else { PROTECT(edges = allocVector(STRSXP, adjacent[i])); }/*ELSE*/ /* allocate and set up the weights array. */ PROTECT(edge_weights = allocVector(REALSXP, adjacent[i])); ew = REAL(edge_weights); /* copy the coordinates or the labels of adjacent nodes. */ for (j = 0, k = 0; j < narcs; j++) { if (coords[j + up * narcs] != i + 1) continue; /* copy the weight as well. */ ew[k] = w[j]; if (num_id > 0) e[k++] = coords[(1 - up) * narcs + j]; else SET_STRING_ELT(edges, k++, STRING_ELT(arcs, (1 - up) * narcs + j)); if (k == adjacent[i]) break; }/*FOR*/ if (sub > 0) { /* allocate and set up the "edge" sublist for graphNEL. */ PROTECT(temp = allocVector(VECSXP, 2)); setAttrib(temp, R_NamesSymbol, temp_name); SET_VECTOR_ELT(temp, 0, edges); SET_VECTOR_ELT(temp, 1, edge_weights); SET_VECTOR_ELT(elist, i, temp); UNPROTECT(1); }/*THEN*/ else { /* save weights with edges as names. */ setAttrib(edge_weights, R_NamesSymbol, edges); SET_VECTOR_ELT(elist, i, edge_weights); }/*ELSE*/ UNPROTECT(2); }/*FOR*/ if (sub > 0) UNPROTECT(3); else UNPROTECT(2); return elist; }/*ARCS2WELIST*/
/* a single step of the optimized hill climbing (one arc addition/removal/reversal). */ SEXP hc_opt_step(SEXP amat, SEXP nodes, SEXP added, SEXP cache, SEXP reference, SEXP wlmat, SEXP blmat, SEXP nparents, SEXP maxp, SEXP debug) { int nnodes = length(nodes), i = 0, j = 0; int *am = NULL, *ad = NULL, *w = NULL, *b = NULL, debuglevel = isTRUE(debug); int counter = 0, update = 1, from = 0, to = 0, *path = NULL, *scratch = NULL; double *cache_value = NULL, temp = 0, max = 0, tol = MACHINE_TOL; double *mp = REAL(maxp), *np = REAL(nparents); SEXP bestop; /* allocate and initialize the return value (use FALSE as a canary value). */ PROTECT(bestop = allocVector(VECSXP, 3)); setAttrib(bestop, R_NamesSymbol, mkStringVec(3, "op", "from", "to")); /* allocate and initialize a dummy FALSE object. */ SET_VECTOR_ELT(bestop, 0, ScalarLogical(FALSE)); /* allocate buffers for c_has_path(). */ path = Calloc1D(nnodes, sizeof(int)); scratch = Calloc1D(nnodes, sizeof(int)); /* save pointers to the numeric/integer matrices. */ cache_value = REAL(cache); ad = INTEGER(added); am = INTEGER(amat); w = INTEGER(wlmat); b = INTEGER(blmat); if (debuglevel > 0) { /* count how may arcs are to be tested. */ for (i = 0; i < nnodes * nnodes; i++) counter += ad[i]; Rprintf("----------------------------------------------------------------\n"); Rprintf("* trying to add one of %d arcs.\n", counter); }/*THEN*/ for (i = 0; i < nnodes; i++) { for (j = 0; j < nnodes; j++) { /* nothing to see, move along. */ if (ad[CMC(i, j, nnodes)] == 0) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, nnodes)]; if (debuglevel > 0) { Rprintf(" > trying to add %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ /* this score delta is the best one at the moment, so add the arc if it * does not introduce cycles in the graph. */ if (temp - max > tol) { if (c_has_path(j, i, am, nnodes, nodes, FALSE, FALSE, path, scratch, FALSE)) { if (debuglevel > 0) Rprintf(" > not adding, introduces cycles in the graph.\n"); continue; }/*THEN*/ if (debuglevel > 0) Rprintf(" @ adding %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "set", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ from = i; to = j; /* update the threshold score delta. */ max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ if (debuglevel > 0) { /* count how may arcs are to be tested. */ for (i = 0, counter = 0; i < nnodes * nnodes; i++) counter += am[i] * (1 - w[i]); Rprintf("----------------------------------------------------------------\n"); Rprintf("* trying to remove one of %d arcs.\n", counter); }/*THEN*/ for (i = 0; i < nnodes; i++) { for (j = 0; j < nnodes; j++) { /* nothing to see, move along. */ if (am[CMC(i, j, nnodes)] == 0) continue; /* whitelisted arcs are not to be removed, ever. */ if (w[CMC(i, j, nnodes)] == 1) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, nnodes)]; if (debuglevel > 0) { Rprintf(" > trying to remove %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ if (temp - max > tol) { if (debuglevel > 0) Rprintf(" @ removing %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "drop", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ from = i; to = j; /* update the threshold score delta. */ max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ if (debuglevel > 0) { /* count how may arcs are to be tested. */ for (i = 0, counter = 0; i < nnodes; i++) for (j = 0; j < nnodes; j++) counter += am[CMC(i, j, nnodes)] * (1 - b[CMC(j, i, nnodes)]); Rprintf("----------------------------------------------------------------\n"); Rprintf("* trying to reverse one of %d arcs.\n", counter); }/*THEN*/ for (i = 0; i < nnodes; i++) { for (j = 0; j < nnodes; j++) { /* nothing to see, move along. */ if (am[CMC(i, j, nnodes)] == 0) continue; /* don't reverse an arc if the one in the opposite direction is * blacklisted, ever. */ if (b[CMC(j, i, nnodes)] == 1) continue; /* do not reverse an arc if that means violating the limit on the * maximum number of parents. */ if (np[i] >= *mp) continue; /* retrieve the score delta from the cache. */ temp = cache_value[CMC(i, j, nnodes)] + cache_value[CMC(j, i, nnodes)]; /* nuke small values and negative zeroes. */ if (fabs(temp) < tol) temp = 0; if (debuglevel > 0) { Rprintf(" > trying to reverse %s -> %s.\n", NODE(i), NODE(j)); Rprintf(" > delta between scores for nodes %s %s is %lf.\n", NODE(i), NODE(j), temp); }/*THEN*/ if (temp - max > tol) { if (c_has_path(i, j, am, nnodes, nodes, FALSE, TRUE, path, scratch, FALSE)) { if (debuglevel > 0) Rprintf(" > not reversing, introduces cycles in the graph.\n"); continue; }/*THEN*/ if (debuglevel > 0) Rprintf(" @ reversing %s -> %s.\n", NODE(i), NODE(j)); /* update the return value. */ bestop_update(bestop, "reverse", NODE(i), NODE(j)); /* store the node indices to update the reference scores. */ from = i; to = j; /* both nodes' reference scores must be updated. */ update = 2; /* update the threshold score delta. */ max = temp; }/*THEN*/ }/*FOR*/ }/*FOR*/ /* update the reference scores. */ REAL(reference)[to] += cache_value[CMC(from, to, nnodes)]; if (update == 2) REAL(reference)[from] += cache_value[CMC(to, from, nnodes)]; Free1D(path); Free1D(scratch); UNPROTECT(1); return bestop; }/*HC_OPT_STEP*/
/* convert a set of neighbourhoods into an arc set. */ SEXP nbr2arcs(SEXP nbr) { int i = 0, j = 0, k = 0, narcs = 0; int length_names = 0; SEXP arcs, temp, names; /* get the names of the nodes. */ names = getAttrib(nbr, R_NamesSymbol); length_names = length(names); /* scan the structure to determine the number of arcs. */ for (i = 0; i < length_names; i++) { /* get the entry for the neighbours of the node.*/ temp = getListElement(nbr, (char *)CHAR(STRING_ELT(names, i))); temp = getListElement(temp, "nbr"); narcs += length(temp); }/*FOR*/ /* if there are no arcs, return an empty arc set. */ if (narcs == 0) { /* allocate an empty arc set. */ PROTECT(arcs = allocMatrix(STRSXP, 0, 2)); /* set the column names. */ setDimNames(arcs, R_NilValue, mkStringVec(2, "from", "to")); UNPROTECT(1); return arcs; }/*THEN*/ else { /* allocate the arc set. */ PROTECT(arcs = allocMatrix(STRSXP, narcs, 2)); /* set the column names. */ setDimNames(arcs, R_NilValue, mkStringVec(2, "from", "to")); }/*ELSE*/ /* rescan the structure to build the arc set. */ for (i = 0; i < length_names; i++) { /* get the entry for the neighbours of the node.*/ temp = getListElement(nbr, (char *)CHAR(STRING_ELT(names, i))); temp = getListElement(temp, "nbr"); for (j = 0; j < length(temp); j++) { SET_STRING_ELT(arcs, k, STRING_ELT(names, i)); SET_STRING_ELT(arcs, k + 1 * narcs , STRING_ELT(temp, j)); k++; }/*FOR*/ }/*FOR*/ UNPROTECT(1); return arcs; }/*NBR2ARCS*/
/* C-level interface to unique_arcs. */ SEXP c_unique_arcs(SEXP arcs, SEXP nodes, int warnlevel) { int i = 0, k = 0, nrow = 0, uniq_rows = 0; int *checklist = NULL; SEXP result, try, dup; /* the arc set is empty, nothing to do. */ if (length(arcs) == 0) return arcs; /* there really is a non-empty arc set, process it. */ nrow = length(arcs)/2; /* match the node labels in the arc set. */ PROTECT(try = arc_hash(arcs, nodes, FALSE, FALSE)); /* check which are duplicated. */ PROTECT(dup = duplicated(try, FALSE)); checklist = INTEGER(dup); /* count how many are not. */ for (i = 0; i < nrow; i++) if (checklist[i] == 0) uniq_rows++; /* if there is no duplicate arc simply return the original arc set. */ if (uniq_rows == nrow) { UNPROTECT(2); return arcs; }/*THEN*/ else { /* warn the user if told to do so. */ if (warnlevel > 0) warning("removed %d duplicate arcs.", nrow - uniq_rows); /* allocate and initialize the return value. */ PROTECT(result = allocMatrix(STRSXP, uniq_rows, 2)); /* store the correct arcs in the return value. */ for (i = 0, k = 0; i < nrow; i++) { if (checklist[i] == 0) { SET_STRING_ELT(result, k, STRING_ELT(arcs, i)); SET_STRING_ELT(result, k + uniq_rows, STRING_ELT(arcs, i + nrow)); k++; }/*THEN*/ }/*FOR*/ }/*ELSE*/ /* allocate, initialize and set the column names. */ setDimNames(result, R_NilValue, mkStringVec(2, "from", "to")); UNPROTECT(3); return result; }/*C_UNIQUE_ARCS*/
/* return the skeleton of a DAG/PDAG. */ SEXP dag2ug(SEXP bn, SEXP moral, SEXP debug) { int i = 0, j = 0, k = 0, nnodes = 0, narcs = 0, row = 0; int debuglevel = isTRUE(debug), *moralize = LOGICAL(moral); int *nparents = NULL, *nnbr = NULL; SEXP node_data, current, nodes, result, temp; /* get the nodes' data. */ node_data = getListElement(bn, "nodes"); nnodes = length(node_data); PROTECT(nodes = getAttrib(node_data, R_NamesSymbol)); /* allocate and initialize parents' and neighbours' counters. */ nnbr = Calloc1D(nnodes, sizeof(int)); if (*moralize > 0) nparents = Calloc1D(nnodes, sizeof(int)); /* first pass: count neighbours, parents and resulting arcs. */ for (i = 0; i < nnodes; i++) { /* get the number of neighbours. */ current = VECTOR_ELT(node_data, i); nnbr[i] = length(getListElement(current, "nbr")); /* update the number of arcs to be returned. */ if (*moralize > 0) { /* get also the number of parents, needed to account for the arcs added * for their moralization. */ nparents[i] = length(getListElement(current, "parents")); narcs += nnbr[i] + nparents[i] * (nparents[i] - 1); }/*THEN*/ else { narcs += nnbr[i]; }/*ELSE*/ if (debuglevel > 0) { if (*moralize > 0) { Rprintf("* scanning node %s, found %d neighbours and %d parents.\n", NODE(i), nnbr[i], nparents[i]); Rprintf(" > adding %d arcs, for a total of %d.\n", nnbr[i] + nparents[i] * (nparents[i] - 1), narcs); }/*THEN*/ else { Rprintf("* scanning node %s, found %d neighbours.\n", NODE(i), nnbr[i]); Rprintf(" > adding %d arcs, for a total of %d.\n", nnbr[i], narcs); }/*ELSE*/ }/*THEN*/ }/*FOR*/ /* allocate the return value. */ PROTECT(result = allocMatrix(STRSXP, narcs, 2)); /* allocate and set the column names. */ setDimNames(result, R_NilValue, mkStringVec(2, "from", "to")); /* second pass: fill the return value. */ for (i = 0; i < nnodes; i++) { /* get to the current node. */ current = VECTOR_ELT(node_data, i); /* get the neighbours. */ temp = getListElement(current, "nbr"); for (j = 0; j < nnbr[i]; j++) { SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(nodes, i)); SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, j)); row++; }/*FOR*/ /* if we are not creating a moral graph we are done with this node. */ if (*moralize == 0) continue; /* get the parents. */ temp = getListElement(current, "parents"); for (j = 0; j < nparents[i]; j++) { for (k = j+1; k < nparents[i]; k++) { SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(temp, k)); SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, j)); row++; SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(temp, j)); SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, k)); row++; }/*FOR*/ }/*FOR*/ }/*FOR*/ Free1D(nnbr); if (*moralize > 0) { /* be really sure not to return duplicate arcs in moral graphs when * shielded parents are present (the "shielding" nodes are counted * twice). */ result = c_unique_arcs(result, nodes, FALSE); Free1D(nparents); }/*THEN*/ UNPROTECT(2); return result; }/*DAG2UG*/
/* arc strength (confidence) and direction coefficients. */ SEXP bootstrap_arc_coefficients(SEXP prob, SEXP nodes) { int i = 0, j = 0, k = 0, narcs = 0, nnodes = length(nodes); double *p = NULL, *s = NULL, *d = NULL, tol = MACHINE_TOL;; SEXP res, rownames, from, to, str, dir; /* compute the dimension of the arcs set. */ narcs = nnodes * (nnodes - 1); /* allocate and initialize the various columns. */ PROTECT(from = allocVector(STRSXP, narcs)); PROTECT(to = allocVector(STRSXP, narcs)); PROTECT(str = allocVector(REALSXP, narcs)); PROTECT(dir = allocVector(REALSXP, narcs)); /* dereference the probability matrix and the coefficients once and * for all. */ p = REAL(prob); s = REAL(str); d = REAL(dir); /* fill in the coefficients. */ for (i = 0, k = 0; i < nnodes; i++) { for (j = 0; j < nnodes; j++) { /* "from" must differ from "to". */ if (i == j) continue; /* set the labels of the incident nodes. */ SET_STRING_ELT(from, k, STRING_ELT(nodes, i)); SET_STRING_ELT(to, k, STRING_ELT(nodes, j)); /* compute arc strength and direction confidence. */ s[k] = p[CMC(i, j, nnodes)] + p[CMC(j, i, nnodes)]; d[k] = (s[k] == 0 ? 0 : p[CMC(i, j, nnodes)] / s[k]); /* sanitize out-of-boundary values arising from floating point errors. */ s[k] = (s[k] < tol) ? 0 : s[k]; s[k] = (s[k] > 1 - tol) ? 1 : s[k]; d[k] = (d[k] < tol) ? 0 : d[k]; d[k] = (d[k] > 1 - tol) ? 1 : d[k]; /* increment the arc counter. */ k++; }/*FOR*/ }/*FOR*/ /* allocate and initialize the return value. */ PROTECT(res = allocVector(VECSXP, 4)); /* allocate, initialize and set the class name. */ setAttrib(res, R_ClassSymbol, mkString("data.frame")); /* allocate, initialize and set row names. */ PROTECT(rownames = allocVector(INTSXP, narcs)); for (i = 0; i < narcs; i++) INTEGER(rownames)[i] = i + 1; setAttrib(res, R_RowNamesSymbol, rownames); /* set column names. */ setAttrib(res, R_NamesSymbol, mkStringVec(4, "from", "to", "strength", "direction")); /* attach the four columns. */ SET_VECTOR_ELT(res, 0, from); SET_VECTOR_ELT(res, 1, to); SET_VECTOR_ELT(res, 2, str); SET_VECTOR_ELT(res, 3, dir); UNPROTECT(6); return res; }/*BOOTSTRAP_ARC_COEFFICIENTS*/
double castelo_prior(SEXP beta, SEXP target, SEXP parents, SEXP children, int debuglevel) { int i = 0, k = 0, t = 0, nnodes = 0, cur_arc = 0; int nbeta = length(VECTOR_ELT(beta, 0)); int *temp = NULL, *aid = INTEGER(VECTOR_ELT(beta, 2)); double prior = 0, result = 0; double *bkwd = REAL(VECTOR_ELT(beta, 4)), *fwd = REAL(VECTOR_ELT(beta, 3)); short int *adjacent = NULL; SEXP nodes, try; /* get the node labels. */ nodes = getAttrib(beta, BN_NodesSymbol); nnodes = length(nodes); /* match the target node. */ PROTECT(try = match(nodes, target, 0)); t = INT(try); UNPROTECT(1); /* find out which nodes are parents and which nodes are children. */ adjacent = allocstatus(nnodes); PROTECT(try = match(nodes, parents, 0)); temp = INTEGER(try); for (i = 0; i < length(try); i++) adjacent[temp[i] - 1] = PARENT; UNPROTECT(1); PROTECT(try = match(nodes, children, 0)); temp = INTEGER(try); for (i = 0; i < length(try); i++) adjacent[temp[i] - 1] = CHILD; UNPROTECT(1); /* prior probabilities table lookup. */ for (i = t + 1; i <= nnodes; i++) { /* compute the arc id. */ cur_arc = UPTRI3(t, i, nnodes); /* look up the prior probability. */ for (/*k,*/ prior = ((double)1/3); k < nbeta; k++) { /* arcs are ordered, so we can stop early in the lookup. */ if (aid[k] > cur_arc) break; if (aid[k] == cur_arc) { switch(adjacent[i - 1]) { case PARENT: prior = bkwd[k]; break; case CHILD: prior = fwd[k]; break; default: prior = fmax2(0, 1 - bkwd[k] - fwd[k]); }/*SWITCH*/ break; }/*THEN*/ }/*FOR*/ if (debuglevel > 0) { switch(adjacent[i - 1]) { case PARENT: Rprintf(" > found arc %s -> %s, prior pobability is %lf.\n", NODE(i - 1), NODE(t - 1), prior); break; case CHILD: Rprintf(" > found arc %s -> %s, prior probability is %lf.\n", NODE(t - 1), NODE(i - 1), prior); break; default: Rprintf(" > no arc between %s and %s, prior probability is %lf.\n", NODE(t - 1), NODE(i - 1), prior); }/*SWITCH*/ }/*THEN*/ /* move to log-scale and divide by the non-informative log(1/3), so that * the contribution of each arc whose prior has not been specified by the * user is zero; overflow is likely otherwise. */ result += log(prior / ((double)1/3)); }/*FOR*/ return result; }/*CASTELO_PRIOR*/ /* complete a prior as per Castelo & Siebes. */ SEXP castelo_completion(SEXP prior, SEXP nodes, SEXP learning) { int i = 0, k = 0, cur = 0, narcs1 = 0, narcs2 = 0, nnodes = length(nodes); int *m1 = NULL, *m2 = NULL, *und = NULL, *aid = NULL, *poset = NULL, *id = NULL; double *d1 = NULL, *d2 = NULL, *p = NULL; SEXP df, arc_id, undirected, a1, a2, match1, match2, prob; SEXP result, from, to, nid, dir1, dir2; /* compute numeric IDs for the arcs. */ a1 = VECTOR_ELT(prior, 0); a2 = VECTOR_ELT(prior, 1); narcs1 = length(a1); PROTECT(match1 = match(nodes, a1, 0)); PROTECT(match2 = match(nodes, a2, 0)); m1 = INTEGER(match1); m2 = INTEGER(match2); PROTECT(arc_id = allocVector(INTSXP, narcs1)); aid = INTEGER(arc_id); c_arc_hash(narcs1, nnodes, m1, m2, aid, NULL, FALSE); /* duplicates correspond to undirected arcs. */ PROTECT(undirected = dupe(arc_id)); und = INTEGER(undirected); /* extract the components from the prior. */ prob = VECTOR_ELT(prior, 2); p = REAL(prob); /* count output arcs. */ for (i = 0; i < narcs1; i++) narcs2 += 2 - und[i]; narcs2 /= 2; /* allocate the columns of the return value. */ PROTECT(from = allocVector(STRSXP, narcs2)); PROTECT(to = allocVector(STRSXP, narcs2)); PROTECT(nid = allocVector(INTSXP, narcs2)); id = INTEGER(nid); PROTECT(dir1 = allocVector(REALSXP, narcs2)); d1 = REAL(dir1); PROTECT(dir2 = allocVector(REALSXP, narcs2)); d2 = REAL(dir2); /* sort the strength coefficients. */ poset = alloc1dcont(narcs1); for (k = 0; k < narcs1; k++) poset[k] = k; R_qsort_int_I(aid, poset, 1, narcs1); for (i = 0, k = 0; i < narcs1; i++) { cur = poset[i]; #define ASSIGN(A1, A2, D1, D2) \ SET_STRING_ELT(from, k, STRING_ELT(A1, cur)); \ SET_STRING_ELT(to, k, STRING_ELT(A2, cur)); \ id[k] = aid[i]; \ D1[k] = p[cur]; \ if ((und[cur] == TRUE) && (i < narcs1 - 1)) \ D2[k] = p[poset[++i]]; \ else \ D2[k] = (1 - D1[k])/2; /* copy the node labels. */ if (m1[cur] < m2[cur]) { ASSIGN(a1, a2, d1, d2); }/*THEN*/ else { ASSIGN(a2, a1, d2, d1); }/*ELSE*/ /* check the probabilities do not exceed 1; fail only for large errors. */ if (d1[k] + d2[k] > 1) { if (d1[k] + d2[k] < 1 + 2 * MACHINE_TOL) { d1[k] = d1[k] / (d1[k] + d2[k]); d2[k] = d2[k] / (d1[k] + d2[k]); }/*THEN*/ else { UNPROTECT(9); error("the probabilities for arc %s -> %s sum to %lf.", CHAR(STRING_ELT(from, k)), CHAR(STRING_ELT(to, k)), d1[k] + d2[k]); }/*ELSE*/ }/*THEN*/ /* bound the probability of not including an arc away from zero, structure * learning otherwise fails when starting from the empty graph and gets * stuck very easily in general. */ if (isTRUE(learning) && (fabs(1 - d1[k] - d2[k]) < MACHINE_TOL)) { d1[k] = d1[k] - MACHINE_TOL; d2[k] = d2[k] - MACHINE_TOL; }/*THEN*/ /* move to the next arc. */ k++; }/*FOR*/ /* set up the return value. */ PROTECT(result = allocVector(VECSXP, 5)); SET_VECTOR_ELT(result, 0, from); SET_VECTOR_ELT(result, 1, to); SET_VECTOR_ELT(result, 2, nid); SET_VECTOR_ELT(result, 3, dir1); SET_VECTOR_ELT(result, 4, dir2); setAttrib(result, R_NamesSymbol, mkStringVec(5, "from", "to", "aid", "fwd", "bkwd")); PROTECT(df = minimal_data_frame(result)); UNPROTECT(11); return df; }/*CASTELO_COMPLETION*/
/* check neighbourhood sets and markov blanets for consistency.. */ SEXP bn_recovery(SEXP bn, SEXP strict, SEXP mb, SEXP filter, SEXP debug) { int i = 0, j = 0, k = 0, n = 0, counter = 0; short int *checklist = NULL, err = 0; int debuglevel = isTRUE(debug), checkmb = isTRUE(mb), *flt = INTEGER(filter); SEXP temp, temp2, nodes, elnames = NULL, fixed; /* get the names of the nodes. */ nodes = getAttrib(bn, R_NamesSymbol); n = length(nodes); /* allocate and initialize the checklist. */ checklist = allocstatus(UPTRI_MATRIX(n)); if (debuglevel > 0) { Rprintf("----------------------------------------------------------------\n"); if (checkmb) Rprintf("* checking consistency of markov blankets.\n"); else Rprintf("* checking consistency of neighbourhood sets.\n"); }/*THEN*/ /* scan the structure to determine the number of arcs. */ for (i = 0; i < n; i++) { if (debuglevel > 0) Rprintf(" > checking node %s.\n", NODE(i)); /* get the entry for the (neighbours|elements of the markov blanket) of the node.*/ temp = getListElement(bn, (char *)NODE(i)); if (!checkmb) temp = getListElement(temp, "nbr"); /* check each element of the array and identify which variable it corresponds to. */ for (j = 0; j < length(temp); j++) { for (k = 0; k < n; k++) { /* increment the right element of checklist. */ if (!strcmp(NODE(k), (char *)CHAR(STRING_ELT(temp, j)))) checklist[UPTRI(i + 1, k + 1, n)]++; }/*FOR*/ }/*FOR*/ }/*FOR*/ /* if A is a neighbour of B, B is a neighbour of A; therefore each entry in * the checklist array must be equal to either zero (if the corresponding * nodes are not neighbours) or two (if the corresponding nodes are neighbours). * Any other value (typically one) is caused by an incorrect (i.e. asymmetric) * neighbourhood structure. The same logic holds for the markov blankets. */ for (i = 0; i < n; i++) for (j = i; j < n; j++) { if ((checklist[UPTRI(i + 1, j + 1, n)] != 0) && (checklist[UPTRI(i + 1, j + 1, n)] != 2)) { if (debuglevel > 0) { if (checkmb) Rprintf("@ asymmetry in the markov blankets for %s and %s.\n", NODE(i), NODE(j)); else Rprintf("@ asymmetry in the neighbourhood sets for %s and %s.\n", NODE(i), NODE(j)); }/*THEN*/ err = 1; }/*THEN*/ }/*FOR*/ /* no need to go on if the (neighbourhood sets|markov blankets) are symmetric; * otherwise throw either an error or a warning according to the value of the * strict parameter. */ if (!err) { return bn; }/*THEN*/ else if (isTRUE(strict)) { if (checkmb) error("markov blankets are not symmetric.\n"); else error("neighbourhood sets are not symmetric.\n"); }/*THEN*/ /* build a correct structure to return. */ PROTECT(fixed = allocVector(VECSXP, n)); setAttrib(fixed, R_NamesSymbol, nodes); /* allocate colnames. */ if (!checkmb) PROTECT(elnames = mkStringVec(2, "mb", "nbr")); for (i = 0; i < n; i++) { if (!checkmb) { /* allocate the "mb" and "nbr" elements of the node. */ PROTECT(temp = allocVector(VECSXP, 2)); SET_VECTOR_ELT(fixed, i, temp); setAttrib(temp, R_NamesSymbol, elnames); /* copy the "mb" part from the old structure. */ temp2 = getListElement(bn, (char *)NODE(i)); temp2 = getListElement(temp2, "mb"); SET_VECTOR_ELT(temp, 0, temp2); }/*THEN*/ /* rescan the checklist. */ for (j = 0; j < n; j++) if (checklist[UPTRI(i + 1, j + 1, n)] >= *flt) if (i != j) counter++; /* allocate and fill the "nbr" element. */ PROTECT(temp2 = allocVector(STRSXP, counter)); for (j = 0; j < n; j++) if (checklist[UPTRI(i + 1, j + 1, n)] == *flt) if (i != j) SET_STRING_ELT(temp2, --counter, STRING_ELT(nodes, j)); if (checkmb) { SET_VECTOR_ELT(fixed, i, temp2); UNPROTECT(1); }/*THEN*/ else { SET_VECTOR_ELT(temp, 1, temp2); UNPROTECT(2); }/*ELSE*/ }/*FOR*/ if (checkmb) UNPROTECT(1); else UNPROTECT(2); return fixed; }/*BN_RECOVERY*/