void build_tau(double **data, double *tau, int *ncols, int *nrows, int *imaginary, double *phi) { int i = 0, j = 0, res_ncols = *ncols + 1; double temp = 0; double *mean = NULL, *mat = NULL; /* allocate mean vector and covariance matrix. */ mean = alloc1dreal(*ncols); mat = alloc1dreal((*ncols) * (*ncols)); /* compute the mean values. */ for (i = 0; i < *ncols; i++) { for (j = 0 ; j < *nrows; j++) mean[i] += data[i][j]; mean[i] /= (*nrows); }/*FOR*/ /* compute the covariance matrix... */ c_covmat(data, mean, ncols, nrows, mat); /* ... multiply it by the phi coefficient... */ for (i = 0; i < *ncols; i++) for (j = 0; j < *ncols; j++) mat[CMC(i, j, *ncols)] *= (*phi); /* ... compute the pseudoinverse... */ c_ginv(mat, ncols, mat); /* ... and store it in the bottom-right corner of the tau matrix. */ for (i = 1; i < res_ncols; i++) for (j = 1; j < res_ncols; j++) tau[CMC(i, j, res_ncols)] = mat[CMC(i - 1, j - 1, *ncols)]; /* fill the top-right and bottom-left corners. */ for (i = 1; i < *ncols + 1; i++) { temp = 0; for (j = 0; j < *ncols; j++) temp += mean[j] * mat[CMC(j, i - 1, *ncols)]; tau[CMC(i, 0, res_ncols)] = tau[CMC(0, i, res_ncols)] = -temp; }/*FOR*/ /* fill the top-left corner. */ for (i = 1; i < res_ncols; i++) tau[CMC(0, 0, res_ncols)] += - mean[i - 1] * tau[CMC(i, 0, res_ncols)]; tau[CMC(0, 0, res_ncols)] += 1/((double) *imaginary); /* perform the final (pseudo)inversion. */ c_ginv(tau, &res_ncols, tau); }/*BUILD_TAU*/
/* shrinked mutual information, to be used for the asymptotic test. */ SEXP shmi(SEXP x, SEXP y, SEXP lx, SEXP ly, SEXP length) { int i = 0, j = 0, k = 0; double **n = NULL, *ni = NULL, *nj = NULL; int *llx = INTEGER(lx), *lly = INTEGER(ly), *num = INTEGER(length); int *xx = INTEGER(x), *yy = INTEGER(y); double lambda = 0, target = 1/(double)((*llx) * (*lly)); double *res = NULL; SEXP result; /* allocate and initialize result to zero. */ PROTECT(result = allocVector(REALSXP, 1)); res = REAL(result); *res = 0; /* initialize the contingency table and the marginal frequencies. */ n = alloc2dreal(*llx, *lly); ni = alloc1dreal(*llx); nj = alloc1dreal(*lly); /* compute the joint frequency of x and y. */ for (k = 0; k < *num; k++) { n[xx[k] - 1][yy[k] - 1]++; }/*FOR*/ /* estimate the optimal lambda for the data. */ _mi_lambda((double *)n, &lambda, &target, num, llx, lly, NULL); /* switch to the probability scale and shrink the estimates. */ for (i = 0; i < *llx; i++) for (j = 0; j < *lly; j++) n[i][j] = lambda * target + (1 - lambda) * n[i][j] / (*num); /* compute the marginals. */ for (i = 0; i < *llx; i++) for (j = 0; j < *lly; j++) { ni[i] += n[i][j]; nj[j] += n[i][j]; }/*FOR*/ /* compute the mutual information from the joint and marginal frequencies. */ for (i = 0; i < *llx; i++) for (j = 0; j < *lly; j++) { if (n[i][j] != 0) *res += n[i][j] * log(n[i][j] / (ni[i] * nj[j])); }/*FOR*/ UNPROTECT(1); return result; }/*SHMI*/
/* get the number of parameters of the whole network (discrete case). */ SEXP nparams_dnet(SEXP graph, SEXP data, SEXP real, SEXP debug) { int i = 0, j = 0, nnodes = 0; int *index = NULL, *r = LOGICAL(real), *debuglevel = LOGICAL(debug); double node_params = 0; double *res = NULL, *nlevels = NULL; SEXP nodes, node_data, parents, try, result; /* get nodes' number and data. */ node_data = getListElement(graph, "nodes"); nodes = getAttrib(node_data, R_NamesSymbol); nnodes = LENGTH(node_data); /* get the level count for each node. */ nlevels = alloc1dreal(nnodes); for (i = 0; i < nnodes; i++) nlevels[i] = NLEVELS2(data, i); /* allocate and initialize the return value. */ PROTECT(result = allocVector(REALSXP, 1)); res = REAL(result); res[0] = 0; /* for each node... */ for (i = 0; i < nnodes; i++) { /* reset the parameter counter. */ node_params = 1; /* match the parents of the node. */ parents = getListElement(VECTOR_ELT(node_data, i), "parents"); PROTECT(try = match(nodes, parents, 0)); index = INTEGER(try); /* compute the number of configurations. */ for (j = 0; j < LENGTH(try); j++) node_params *= nlevels[index[j] - 1]; UNPROTECT(1); /* multiply by the number of free parameters. */ if (*r > 0) node_params *= nlevels[i] - 1; else node_params *= nlevels[i]; if (*debuglevel > 0) Rprintf("* node %s has %.0lf parameter(s).\n", NODE(i), node_params); /* update the return value. */ res[0] += node_params; }/*FOR*/ UNPROTECT(1); return result; }/*NPARAMS_DNET*/
/* unconditional discrete sampling. */ void rbn_discrete_root(SEXP result, int cur, SEXP cpt, int *num, int ordinal, SEXP fixed) { int np = LENGTH(cpt), *gen = NULL, *workplace = NULL; double *p = NULL; SEXP generated, class, lvls; /* get the levels of the curent variable .*/ lvls = VECTOR_ELT(getAttrib(cpt, R_DimNamesSymbol), 0); /* allocate the memory for the generated observations. */ PROTECT(generated = allocVector(INTSXP, *num)); gen = INTEGER(generated); if (fixed != R_NilValue) { int constant = INTEGER(match(lvls, fixed, 0))[0]; for (int i = 0; i < *num; i++) gen[i] = constant; }/*THEN*/ else { workplace = alloc1dcont(np); /* duplicate the probability table to save the original copy from tampering. */ p = alloc1dreal(np); memcpy(p, REAL(cpt), np * sizeof(double)); /* perform the random sampling. */ ProbSampleReplace(np, p, workplace, *num, gen); }/*ELSE*/ /* set up all the attributes for the newly generated observations. */ setAttrib(generated, R_LevelsSymbol, lvls); if (ordinal) { PROTECT(class = allocVector(STRSXP, 2)); SET_STRING_ELT(class, 0, mkChar("ordered")); SET_STRING_ELT(class, 1, mkChar("factor")); }/*THEN*/ else {
/* predict the value of a discrete node without parents. */ SEXP dpred(SEXP fitted, SEXP data, SEXP debug) { int i = 0, nmax = 0, ndata = LENGTH(data), length = 0; int *res = NULL, *debuglevel = LOGICAL(debug), *iscratch = NULL, *maxima = NULL; double *prob = NULL, *dscratch = NULL, *buf = NULL; SEXP ptab, result, tr_levels = getAttrib(data, R_LevelsSymbol); /* get the probabilities of the multinomial distribution. */ ptab = getListElement(fitted, "prob"); length = LENGTH(ptab); prob = REAL(ptab); /* create the vector of indexes. */ iscratch = alloc1dcont(length); for (i = 0; i < length; i++) iscratch[i] = i + 1; /* create a scratch copy of the array. */ buf = alloc1dreal(length); dscratch = alloc1dreal(length); memcpy(dscratch, prob, length * sizeof(double)); /* allocate the array for the indexes of the maxima. */ maxima = alloc1dcont(length); /* find out the mode(s). */ all_max(dscratch, length, maxima, &nmax, iscratch, buf); /* allocate and initialize the return value. */ PROTECT(result = allocVector(INTSXP, ndata)); res = INTEGER(result); if (nmax == 1) { /* copy the index of the mode in the return value. */ for (i = 0; i < ndata; i++) res[i] = maxima[0]; if (*debuglevel > 0) { if (res[0] == NA_INTEGER) Rprintf(" > prediction for all observations is NA with probabilities:\n"); else Rprintf(" > prediction for all observations is '%s' with probabilities:\n", CHAR(STRING_ELT(tr_levels, res[0] - 1))); Rprintf(" "); for (i = 0; i < LENGTH(ptab); i++) Rprintf(" %lf", prob[i]); Rprintf("\n"); }/*THEN*/ }/*THEN*/ else { /* break ties: sample with replacement from all the maxima. */ GetRNGstate(); SampleReplace(ndata, nmax, res, maxima); PutRNGstate(); if (*debuglevel > 0) { Rprintf(" > there are %d levels tied for prediction, applying tie breaking.\n", nmax); Rprintf(" > tied levels are:"); for (i = 0; i < nmax; i++) Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[i] - 1))); Rprintf(".\n"); }/*THEN*/ }/*ELSE*/ /* copy the labels and the class from the input data. */ setAttrib(result, R_LevelsSymbol, tr_levels); setAttrib(result, R_ClassSymbol, getAttrib(data, R_ClassSymbol)); UNPROTECT(1); return result; }/*DPRED*/
/* predict the value of the training variable in a naive Bayes or Tree-Augmented * naive Bayes classifier. */ SEXP naivepred(SEXP fitted, SEXP data, SEXP parents, SEXP training, SEXP prior, SEXP prob, SEXP debug) { int i = 0, j = 0, k = 0, n = 0, nvars = LENGTH(fitted), nmax = 0, tr_nlevels = 0; int *res = NULL, **ex = NULL, *ex_nlevels = NULL; int idx = 0, *tr_id = INTEGER(training); int *iscratch = NULL, *maxima = NULL, *prn = NULL, *debuglevel = LOGICAL(debug); int *include_prob = LOGICAL(prob); double **cpt = NULL, *pr = NULL, *scratch = NULL, *buf = NULL, *pt = NULL; double sum = 0; SEXP class, temp, tr, tr_levels, result, nodes, probtab, dimnames; /* cache the node labels. */ nodes = getAttrib(fitted, R_NamesSymbol); /* cache the pointers to all the variables. */ ex = (int **) alloc1dpointer(nvars); ex_nlevels = alloc1dcont(nvars); for (i = 0; i < nvars; i++) { temp = VECTOR_ELT(data, i); ex[i] = INTEGER(temp); ex_nlevels[i] = NLEVELS(temp); }/*FOR*/ /* get the training variable and its levels. */ n = LENGTH(VECTOR_ELT(data, 0)); tr = getListElement(VECTOR_ELT(fitted, *tr_id - 1), "prob"); tr_levels = VECTOR_ELT(getAttrib(tr, R_DimNamesSymbol), 0); tr_nlevels = LENGTH(tr_levels); /* get the prior distribution. */ pr = REAL(prior); if (*debuglevel > 0) { Rprintf("* the prior distribution for the target variable is:\n"); PrintValue(prior); }/*THEN*/ /* allocate the scratch space used to compute posterior probabilities. */ scratch = alloc1dreal(tr_nlevels); buf = alloc1dreal(tr_nlevels); /* cache the pointers to the conditional probability tables. */ cpt = (double **) alloc1dpointer(nvars); for (i = 0; i < nvars; i++) cpt[i] = REAL(getListElement(VECTOR_ELT(fitted, i), "prob")); /* dereference the parents' vector. */ prn = INTEGER(parents); /* create the vector of indexes. */ iscratch = alloc1dcont(tr_nlevels); /* allocate the array for the indexes of the maxima. */ maxima = alloc1dcont(tr_nlevels); /* allocate the return value. */ PROTECT(result = allocVector(INTSXP, n)); res = INTEGER(result); /* allocate and initialize the table of the posterior probabilities. */ if (*include_prob > 0) { PROTECT(probtab = allocMatrix(REALSXP, tr_nlevels, n)); pt = REAL(probtab); memset(pt, '\0', n * tr_nlevels * sizeof(double)); }/*THEN*/ /* initialize the random seed, just in case we need it for tie breaking. */ GetRNGstate(); /* for each observation... */ for (i = 0; i < n; i++) { /* ... reset the scratch space and the indexes array... */ for (k = 0; k < tr_nlevels; k++) { scratch[k] = log(pr[k]); iscratch[k] = k + 1; }/*FOR*/ if (*debuglevel > 0) Rprintf("* predicting the value of observation %d.\n", i + 1); /* ... and for each conditional probability table... */ for (j = 0; j < nvars; j++) { /* ... skip the training variable... */ if (*tr_id == j + 1) continue; /* ... (this is the root node of the Chow-Liu tree) ... */ if (prn[j] == NA_INTEGER) { /* ... and for each row of the conditional probability table... */ for (k = 0; k < tr_nlevels; k++) { if (*debuglevel > 0) { Rprintf(" > node %s: picking cell %d (%d, %d) from the CPT (p = %lf).\n", NODE(j), CMC(ex[j][i] - 1, k, ex_nlevels[j]), ex[j][i], k + 1, cpt[j][CMC(ex[j][i] - 1, k, ex_nlevels[j])]); }/*THEN*/ /* ... update the posterior probability. */ scratch[k] += log(cpt[j][CMC(ex[j][i] - 1, k, ex_nlevels[j])]); }/*FOR*/ }/*THEN*/ else { /* ... and for each row of the conditional probability table... */ for (k = 0; k < tr_nlevels; k++) { /* (the first dimension corresponds to the current node [X], the second * to the training node [Y], the third to the only parent of the current * node [Z]; CMC coordinates are computed as X + Y * NX + Z * NX * NY. */ idx = (ex[j][i] - 1) + k * ex_nlevels[j] + (ex[prn[j] - 1][i] - 1) * ex_nlevels[j] * tr_nlevels; if (*debuglevel > 0) { Rprintf(" > node %s: picking cell %d (%d, %d, %d) from the CPT (p = %lf).\n", NODE(j), idx, ex[j][i], k + 1, ex[prn[j] - 1][i], cpt[j][idx]); }/*THEN*/ /* ... update the posterior probability. */ scratch[k] += log(cpt[j][idx]); }/*FOR*/ }/*ELSE*/ }/*FOR*/ /* find out the mode(s). */ all_max(scratch, tr_nlevels, maxima, &nmax, iscratch, buf); /* compute the posterior probabilities on the right scale, to attach them * to the return value. */ if (*include_prob) { /* copy the log-probabilities from scratch. */ memcpy(pt + i * tr_nlevels, scratch, tr_nlevels * sizeof(double)); /* transform log-probabilitiees into plain probabilities. */ for (k = 0, sum = 0; k < tr_nlevels; k++) sum += pt[i * tr_nlevels + k] = exp(pt[i * tr_nlevels + k] - scratch[maxima[0] - 1]); /* rescale them to sum up to 1. */ for (k = 0; k < tr_nlevels; k++) pt[i * tr_nlevels + k] /= sum; }/*THEN*/ if (nmax == 1) { res[i] = maxima[0]; if (*debuglevel > 0) { Rprintf(" @ prediction for observation %d is '%s' with (log-)posterior:\n", i + 1, CHAR(STRING_ELT(tr_levels, res[i] - 1))); Rprintf(" "); for (k = 0; k < tr_nlevels; k++) Rprintf(" %lf", scratch[k]); Rprintf("\n"); }/*THEN*/ }/*THEN*/ else { /* break ties: sample with replacement from all the maxima. */ SampleReplace(1, nmax, res + i, maxima); if (*debuglevel > 0) { Rprintf(" @ there are %d levels tied for prediction of observation %d, applying tie breaking.\n", nmax, i + 1); Rprintf(" "); for (k = 0; k < tr_nlevels; k++) Rprintf(" %lf", scratch[k]); Rprintf("\n"); Rprintf(" @ tied levels are:"); for (k = 0; k < nmax; k++) Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[k] - 1))); Rprintf(".\n"); }/*THEN*/ }/*ELSE*/ }/*FOR*/ /* save the state of the random number generator. */ PutRNGstate(); /* add back the attributes and the class to the return value. */ PROTECT(class = allocVector(STRSXP, 1)); SET_STRING_ELT(class, 0, mkChar("factor")); setAttrib(result, R_LevelsSymbol, tr_levels); setAttrib(result, R_ClassSymbol, class); if (*include_prob > 0) { /* set the levels of the taregt variable as rownames. */ PROTECT(dimnames = allocVector(VECSXP, 2)); SET_VECTOR_ELT(dimnames, 0, tr_levels); setAttrib(probtab, R_DimNamesSymbol, dimnames); /* add the posterior probabilities to the return value. */ setAttrib(result, install("prob"), probtab); UNPROTECT(4); }/*THEN*/ else { UNPROTECT(2); }/*ELSE*/ return result; }/*NAIVEPRED*/
/* predict the value of a discrete node with one or more parents. */ SEXP cdpred(SEXP fitted, SEXP data, SEXP parents, SEXP debug) { int i = 0, k = 0, ndata = LENGTH(data), nrows = 0, ncols = 0; int *configs = INTEGER(parents), *debuglevel = LOGICAL(debug); int *iscratch = NULL, *maxima = NULL, *nmax = NULL, *res = NULL; double *prob = NULL, *dscratch = NULL, *buf = NULL; SEXP temp, result, tr_levels = getAttrib(data, R_LevelsSymbol); /* get the probabilities of the multinomial distribution. */ temp = getListElement(fitted, "prob"); nrows = INT(getAttrib(temp, R_DimSymbol)); ncols = LENGTH(temp) / nrows; prob = REAL(temp); /* create the vector of indexes. */ iscratch = alloc1dcont(nrows); /* create a scratch copy of the array. */ buf = alloc1dreal(nrows); dscratch = alloc1dreal(nrows * ncols); memcpy(dscratch, prob, nrows * ncols * sizeof(double)); /* allocate the array for the indexes of the maxima. */ maxima = alloc1dcont(nrows * ncols); /* allocate the maxima counters. */ nmax = alloc1dcont(ncols); /* get the mode for each configuration. */ for (i = 0; i < ncols; i++) { /* initialize the vector of indexes. */ for (k = 0; k < nrows; k++) iscratch[k] = k + 1; /* find out the mode(s). */ all_max(dscratch + i * nrows, nrows, maxima + i * nrows, nmax + i, iscratch, buf); }/*FOR*/ /* allocate and initialize the return value. */ PROTECT(result = allocVector(INTSXP, ndata)); res = INTEGER(result); /* initialize the random seed, just in case we need it for tie breaking. */ GetRNGstate(); /* copy the index of the mode in the return value. */ for (i = 0; i < ndata; i++) { if (nmax[configs[i] - 1] == 1) { res[i] = maxima[CMC(0, configs[i] - 1, nrows)]; if (*debuglevel > 0) { if (res[i] == NA_INTEGER) Rprintf(" > prediction for observation %d is NA with probabilities:\n"); else Rprintf(" > prediction for observation %d is '%s' with probabilities:\n", i + 1, CHAR(STRING_ELT(tr_levels, res[i] - 1))); Rprintf(" "); for (int k = 0; k < nrows; k++) Rprintf(" %lf", (prob + nrows * (configs[i] - 1))[k]); Rprintf("\n"); }/*THEN*/ }/*THEN*/ else { /* break ties: sample with replacement from all the maxima. */ SampleReplace(1, nmax[configs[i] - 1], res + i, maxima + (configs[i] - 1) * nrows); if (*debuglevel > 0) { Rprintf(" > there are %d levels tied for prediction of observation %d, applying tie breaking.\n", nmax[configs[i] - 1], i + 1); Rprintf(" > tied levels are:"); for (k = 0; k < nmax[configs[i] - 1]; k++) Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[CMC(k, configs[i] - 1, nrows)] - 1))); Rprintf(".\n"); }/*THEN*/ }/*ELSE*/ }/*FOR*/ /* save the state of the random number generator. */ PutRNGstate(); /* copy the labels and the class from the input data. */ setAttrib(result, R_LevelsSymbol, tr_levels); setAttrib(result, R_ClassSymbol, getAttrib(data, R_ClassSymbol)); UNPROTECT(1); return result; }/*CDPRED*/
/* shrinked conditional mutual information, to be used for the asymptotic * test. */ SEXP shcmi(SEXP x, SEXP y, SEXP z, SEXP lx, SEXP ly, SEXP lz, SEXP length) { int i = 0, j = 0, k = 0; double ***n = NULL, **ni = NULL, **nj = NULL, *nk = NULL; int *llx = INTEGER(lx), *lly = INTEGER(ly), *llz = INTEGER(lz); int *num = INTEGER(length); int *xx = INTEGER(x), *yy = INTEGER(y), *zz = INTEGER(z); double lambda = 0, target = 1/(double)((*llx) * (*lly) * (*llz)); double *res = NULL; SEXP result; /* allocate and initialize result to zero. */ PROTECT(result = allocVector(REALSXP, 1)); res = REAL(result); *res = 0; /* initialize the contingency table and the marginal frequencies. */ n = alloc3dreal(*llx, *lly, *llz); ni = alloc2dreal(*llx, *llz); nj = alloc2dreal(*lly, *llz); nk = alloc1dreal(*llz); /* compute the joint frequency of x, y, and z. */ for (k = 0; k < *num; k++) { n[xx[k] - 1][yy[k] - 1][zz[k] - 1]++; }/*FOR*/ /* estimate the optimal lambda for the data. */ _mi_lambda((double *)n, &lambda, &target, num, llx, lly, llz); /* switch to the probability scale and shrink the estimates. */ for (i = 0; i < *llx; i++) for (j = 0; j < *lly; j++) for (k = 0; k < *llz; k++) n[i][j][k] = lambda * target + (1 - lambda) * n[i][j][k] / (*num); /* compute the marginals. */ for (i = 0; i < *llx; i++) for (j = 0; j < *lly; j++) for (k = 0; k < *llz; k++) { ni[i][k] += n[i][j][k]; nj[j][k] += n[i][j][k]; nk[k] += n[i][j][k]; }/*FOR*/ for (k = 0; k < *llz; k++) { /* check each level of the conditioning variable to avoid (again) * "divide by zero" errors. */ if (nk[k] == 0) continue; for (j = 0; j < *lly; j++) { for (i = 0; i < *llx; i++) { if (n[i][j][k] > 0) *res += n[i][j][k] * log( (n[i][j][k] * nk[k]) / (ni[i][k] * nj[j][k]) ); }/*FOR*/ }/*FOR*/ }/*FOR*/ UNPROTECT(1); return result; }/*SHCMI*/
/* Shrinked Covariance Matrix. */ SEXP cov_lambda(SEXP data, SEXP length) { int i = 0, j = 0, k = 0, cur = 0; int *n = INTEGER(length), ncols = LENGTH(data); double *mean = NULL, *var = NULL, **column = NULL; double lambda = 0, sumcors = 0, sumvars = 0; SEXP res; /* allocate the covariance matrix. */ PROTECT(res = allocMatrix(REALSXP, ncols, ncols)); var = REAL(res); memset(var, '\0', ncols * ncols * sizeof(double)); /* allocate an array to store the mean values. */ mean = alloc1dreal(ncols); /* allocate and initialize an array of pointers for the variables. */ column = (double **) alloc1dpointer(ncols); for (i = 0; i < ncols; i++) column[i] = REAL(VECTOR_ELT(data, i)); /* compute the mean values */ for (i = 0; i < ncols; i++) { for (j = 0 ; j < *n; j++) mean[i] += column[i][j]; mean[i] /= (*n); }/*FOR*/ for (i = 0; i < ncols; i++) { for (j = i; j < ncols; j++) { cur = CMC(i, j, ncols); /* compute the actual variance/covariance. */ for (k = 0; k < *n; k++) var[cur] += (column[i][k] - mean[i]) * (column[j][k] - mean[j]); if (i != j) { /* do the first round of computations for the shrinkage intensity. */ for (k = 0; k < *n; k++) { sumvars += ((column[i][k] - mean[i]) * (column[j][k] - mean[j]) - var[cur] / (*n)) * ((column[i][k] - mean[i]) * (column[j][k] - mean[j]) - var[cur] / (*n)); }/*FOR*/ sumcors += (var[cur] / (*n - 1)) * (var[cur] / (*n - 1)); }/*THEN*/ /* use the unbiased estimator for variances/covariances. */ var[cur] /= (*n) - 1; /* fill in the symmetric element of the matrix. */ var[CMC(j, i, ncols)] = var[cur]; }/*FOR*/ }/*FOR*/ /* wrap up the computation of the shrinkage intensity. */ lambda = sumvars * (*n) / (*n - 1) / (*n -1) / (*n -1) / sumcors; /* truncate the shrinkage intensity in the [0,1] interval; this is not an * error, but a measure to increase the quality of the shrinked estimate. */ if (lambda > 1) { lambda = 1; }/*THEN*/ else if (lambda < 0) { lambda = 0; }/*THEN*/ /* shrink the covariance matrix (except the diagonal, which stays the same). */ for (i = 0; i < ncols; i++) for (j = 0; j < ncols; j++) if (i != j) var[CMC(i, j, ncols)] *= 1 - lambda; UNPROTECT(1); return res; }/*COV_LAMBDA*/
/* predict the values of one or more variables given one or more variables by * maximum a posteriori (MAP). */ SEXP mappred(SEXP node, SEXP fitted, SEXP data, SEXP n, SEXP from, SEXP debug) { int i = 0, j = 0, k = 0, nobs = 0, nev = 0, nlvls = 0; int *vartypes = NULL, nsims = INT(n), debuglevel = isTRUE(debug); void **varptrs = NULL, **evptrs = NULL, *pred = NULL, *res = NULL; SEXP result, colnames, evidence, evmatch, temp = R_NilValue; SEXP cpdist, predicted, lvls = R_NilValue; double *wgt = NULL; long double *lvls_counts = NULL; /* extract the names of the variables in the data. */ colnames = getAttrib(data, R_NamesSymbol); /* remove the name of the variable to predict. */ nev = length(from); PROTECT(evmatch = match(colnames, from, 0)); /* cache variable types and pointers. */ vartypes = alloc1dcont(nev); varptrs = alloc1dpointer(nev); for (j = 0, k = 0; j < nev; j++) { temp = VECTOR_ELT(data, INTEGER(evmatch)[j] - 1); vartypes[k] = TYPEOF(temp); varptrs[k++] = DATAPTR(temp); }/*FOR*/ /* cache the sample size. */ nobs = length(temp); /* allocate a list to hold the evidence. */ PROTECT(evidence = allocVector(VECSXP, nev)); setAttrib(evidence, R_NamesSymbol, from); /* cache pointers to the elements of the evidence .*/ evptrs = alloc1dpointer(nev); for (j = 0; j < nev; j++) { PROTECT(temp = allocVector(vartypes[j], 1)); evptrs[j] = DATAPTR(temp); SET_VECTOR_ELT(evidence, j, temp); UNPROTECT(1); }/*FOR*/ /* make the evidence a data frame to compact debugging output. */ minimal_data_frame(evidence); /* allocate the return value. */ PROTECT(result = fitnode2df(fitted, STRING_ELT(node, 0), nobs)); res = DATAPTR(result); /* in the case of discrete variables, allocate scratch space for levels' * frequencies. */ if (TYPEOF(result) == INTSXP) { lvls = getAttrib(result, R_LevelsSymbol); nlvls = length(lvls); lvls_counts = allocldouble(nlvls); }/*THEN*/ /* allocate the weights. */ wgt = alloc1dreal(nsims); /* allocate sratch space for the random samplings. */ PROTECT(cpdist = fit2df(fitted, nsims)); predicted = getListElement(cpdist, (char *)CHAR(STRING_ELT(node, 0))); pred = DATAPTR(predicted); /* iterate over the observations. */ for (i = 0; i < nobs; i++) { /* copy the values into the list. */ for (j = 0; j < nev; j++) { switch(vartypes[j]) { case REALSXP: *((double *)evptrs[j]) = ((double *)varptrs[j])[i]; break; case INTSXP: *((int *)evptrs[j]) = ((int *)varptrs[j])[i]; break; }/*SWITCH*/ }/*FOR*/ if (debuglevel > 0) { Rprintf("* predicting observation %d conditional on:\n", i); PrintValue(evidence); }/*THEN*/ /* generate samples from the conditional posterior distribution. */ c_rbn_master(fitted, cpdist, n, evidence, FALSE); /* compute the weights. */ c_lw_weights(fitted, cpdist, nsims, wgt, from, FALSE); /* compute the posterior estimate. */ switch(TYPEOF(predicted)) { case REALSXP: /* average the predicted values. */ ((double *)res)[i] = posterior_mean((double *)pred, wgt, nsims, debuglevel); break; case INTSXP: /* pick the most frequent value. */ ((int *)res)[i] = posterior_mode((int *)pred, wgt, nsims, lvls_counts, lvls, nlvls, debuglevel); break; }/*SWITCH*/ }/*FOR*/ UNPROTECT(4); return result; }/*MAPPRED*/
/* Chow-Liu structure learning algorithm. */ SEXP chow_liu(SEXP data, SEXP nodes, SEXP estimator, SEXP whitelist, SEXP blacklist, SEXP conditional, SEXP debug) { int i = 0, j = 0, k = 0, debug_coord[2], ncols = LENGTH(data); int num = LENGTH(VECTOR_ELT(data, i)), narcs = 0, nwl = 0, nbl = 0; int *nlevels = NULL, *clevels = NULL, *est = INTEGER(estimator); int *wl = NULL, *bl = NULL, *poset = NULL, *debuglevel = LOGICAL(debug); void **columns = NULL, *cond = NULL; short int *include = NULL; double *mim = NULL; SEXP arcs, wlist, blist; /* dereference the columns of the data frame. */ DEREFERENCE_DATA_FRAME() /* only TAN uses a conditional variable, so assume it's discrete and go ahead. */ if (conditional != R_NilValue) { cond = (void *) INTEGER(conditional); clevels = alloc1dcont(1); *clevels = NLEVELS(conditional); }/*THEN*/ /* allocate the mutual information matrix and the status vector. */ mim = alloc1dreal(UPTRI3_MATRIX(ncols)); include = allocstatus(UPTRI3_MATRIX(ncols)); /* compute the pairwise mutual information coefficients. */ if (*debuglevel > 0) Rprintf("* computing pairwise mutual information coefficients.\n"); mi_matrix(mim, columns, ncols, nlevels, &num, cond, clevels, est); LIST_MUTUAL_INFORMATION_COEFS() /* add whitelisted arcs first. */ if ((!isNull(whitelist)) && (LENGTH(whitelist) > 0)) { PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE)); wl = INTEGER(wlist); nwl = LENGTH(wlist); for (i = 0; i < nwl; i++) { if (*debuglevel > 0) { Rprintf("* adding whitelisted arcs first.\n"); if (include[wl[i]] == 0) { Rprintf(" > arc %s - %s has been added to the graph.\n", CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl))); }/*THEN*/ else { Rprintf(" > arc %s - %s was already present in the graph.\n", CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl))); }/*ELSE*/ }/*THEN*/ /* update the counter if need be. */ if (include[wl[i]] == 0) narcs++; /* include the arc in the graph. */ include[wl[i]] = 1; }/*FOR*/ UNPROTECT(1); }/*THEN*/ /* cache blacklisted arcs. */ if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0)) { PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE)); bl = INTEGER(blist); nbl = LENGTH(blist); }/*THEN*/ /* sort the mutual information coefficients and keep track of the elements' index. */ poset = alloc1dcont(UPTRI3_MATRIX(ncols)); for (i = 0; i < UPTRI3_MATRIX(ncols); i++) poset[i] = i; R_qsort_I(mim, poset, 1, UPTRI3_MATRIX(ncols)); for (i = UPTRI3_MATRIX(ncols) - 1; i > 0; i--) { /* get back the coordinates from the position in the half-matrix. */ INV_UPTRI3(poset[i], ncols, debug_coord); /* already included all the arcs we had to, exiting. */ if (narcs >= ncols - 1) break; /* arc already present in the graph, nothing to do. */ if (include[poset[i]] == 1) continue; if (bl) { if (chow_liu_blacklist(bl, &nbl, poset + i)) { if (*debuglevel > 0) { Rprintf("* arc %s - %s is blacklisted, skipping.\n", NODE(debug_coord[0]), NODE(debug_coord[1])); }/*THEN*/ continue; }/*THEN*/ }/*THEN*/ if (c_uptri3_path(include, debug_coord[0], debug_coord[1], ncols, nodes, FALSE)) { if (*debuglevel > 0) { Rprintf("* arc %s - %s introduces cycles, skipping.\n", NODE(debug_coord[0]), NODE(debug_coord[1])); }/*THEN*/ continue; }/*THEN*/ if (*debuglevel > 0) { Rprintf("* adding arc %s - %s with mutual information %lf.\n", NODE(debug_coord[0]), NODE(debug_coord[1]), mim[i]); }/*THEN*/ /* include the arc in the graph. */ include[poset[i]] = 1; /* update the counter. */ narcs++; }/*FOR*/ if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0)) UNPROTECT(1); /* sanity check for blacklist-related madnes. */ if (narcs != ncols - 1) error("learned %d arcs instead of %d, this is not a tree spanning all the nodes.", narcs, ncols - 1); CONVERT_TO_ARC_SET(include, 0, 2 * (ncols - 1)); return arcs; }/*CHOW_LIU*/
/* ARACNE structure learning algorithm. */ SEXP aracne(SEXP data, SEXP estimator, SEXP whitelist, SEXP blacklist, SEXP debug) { int i = 0, j = 0, k = 0, coord = 0, ncols = LENGTH(data); int num = LENGTH(VECTOR_ELT(data, i)), narcs = ncols * (ncols - 1) / 2; int *nlevels = NULL, *est = INTEGER(estimator), *wl = NULL, *bl = NULL; int *debuglevel = LOGICAL(debug); void **columns = NULL; short int *exclude = NULL; double *mim = NULL; SEXP arcs, nodes, wlist, blist; nodes = getAttrib(data, R_NamesSymbol); /* dereference the columns of the data frame. */ DEREFERENCE_DATA_FRAME() /* allocate the mutual information matrix and the status vector. */ mim = alloc1dreal(UPTRI3_MATRIX(ncols)); exclude = allocstatus(UPTRI3_MATRIX(ncols)); /* compute the pairwise mutual information coefficients. */ if (*debuglevel > 0) Rprintf("* computing pairwise mutual information coefficients.\n"); mi_matrix(mim, columns, ncols, nlevels, &num, NULL, NULL, est); LIST_MUTUAL_INFORMATION_COEFS() /* compare all the triplets. */ for (i = 0; i < ncols; i++) { for (j = i + 1; j < ncols; j++) { for (k = 0; k < ncols; k++) { if ((k == i) || (k == j)) continue; /* cache the UPTRI3 coordinates of the arc. */ coord = UPTRI3(i + 1, j + 1, ncols); /* if MI(X, Y) < min(MI(X, Z), MI(Z, Y)) drop arc X - Y. */ if ((mim[coord] < mim[UPTRI3(i + 1, k + 1, ncols)]) && (mim[coord] < mim[UPTRI3(j + 1, k + 1, ncols)])) { if (*debuglevel > 0) { Rprintf("* dropping arc %s - %s because of %s, %lf < min(%lf, %lf)\n", NODE(i), NODE(j), NODE(k), mim[UPTRI3(i + 1, j + 1, ncols)], mim[UPTRI3(i + 1, k + 1, ncols)], mim[UPTRI3(j + 1, k + 1, ncols)]); }/*THEN*/ /* update the status vector. */ exclude[coord] = 1; /* decrement the number of arcs. */ narcs--; break; }/*THEN*/ }/*FOR*/ }/*FOR*/ }/*FOR*/ /* add back whitelisted arcs. */ if ((!isNull(whitelist)) && (LENGTH(whitelist) > 0)) { PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE)); wl = INTEGER(wlist); for (i = 0; i < LENGTH(wlist); i++) { if (*debuglevel > 0) { Rprintf("* adding back whitelisted arcs.\n"); if (exclude[wl[i]] == 1) { Rprintf(" > arc %s - %s has been added to the graph.\n", CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + LENGTH(wlist)))); }/*THEN*/ else { Rprintf(" > arc %s - %s was already present in the graph.\n", CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + LENGTH(wlist)))); }/*ELSE*/ }/*THEN*/ /* update the counter if need be. */ if (exclude[wl[i]] == 1) narcs++; /* include the arc in the graph. */ exclude[wl[i]] = 0; }/*FOR*/ UNPROTECT(1); }/*THEN*/ /* remove blacklisted arcs. */ if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0)) { PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE)); bl = INTEGER(blist); for (i = 0; i < LENGTH(blist); i++) { if (*debuglevel > 0) { Rprintf("* removing blacklisted arcs.\n"); if (exclude[bl[i]] == 0) { Rprintf(" > arc %s - %s has been dropped from the graph.\n", CHAR(STRING_ELT(blacklist, i)), CHAR(STRING_ELT(blacklist, i + LENGTH(blist)))); }/*THEN*/ else { Rprintf(" > arc %s - %s was not present in the graph.\n", CHAR(STRING_ELT(blacklist, i)), CHAR(STRING_ELT(blacklist, i + LENGTH(blist)))); }/*ELSE*/ }/*THEN*/ /* update the counter if need be. */ if (exclude[bl[i]] == 0) narcs--; /* remove the arc from the graph. */ exclude[bl[i]] = 1; }/*FOR*/ UNPROTECT(1); }/*THEN*/ CONVERT_TO_ARC_SET(exclude, 1, 2 * narcs); return arcs; }/*ARACNE*/
/* conditional Monte Carlo simulation for correlation-based tests. */ SEXP gauss_cmcarlo(SEXP data, SEXP length, SEXP samples, SEXP test, SEXP alpha) { int j = 0, k = 0, ncols = LENGTH(data), errcode = 0, *work = NULL, *perm = NULL; int error_counter = 0, *B = INTEGER(samples), *num = INTEGER(length); double observed = 0, permuted = 0, *yperm = NULL, *yorig = NULL, *res = NULL; double enough = ceil(NUM(alpha) * (*B)) + 1; double **column = NULL, *mean = NULL, *covariance = NULL, *covariance_backup = NULL; double *u = NULL, *d = NULL, *vt = NULL; SEXP result; /* allocate the matrices needed for the SVD decomposition. */ u = alloc1dreal(ncols * ncols); d = alloc1dreal(ncols); vt = alloc1dreal(ncols * ncols); /* allocate and initialize the result. */ PROTECT(result = allocVector(REALSXP, 1)); res = REAL(result); *res = 0; /* allocate and initialize an array of pointers for the variables. */ column = (double **) alloc1dpointer(ncols); for (j = 0; j < ncols; j++) column[j] = REAL(VECTOR_ELT(data, j)); /* cache the means of the variables (they are invariant under permutation). */ mean = alloc1dreal(ncols); /* compute the mean values */ for (j = 0; j < ncols; j++) { for (k = 0 ; k < *num; k++) mean[j] += column[j][k]; mean[j] /= (*num); }/*FOR*/ /* allocate and initialize the covariance matrix. */ covariance = alloc1dreal(ncols * ncols); covariance_backup = alloc1dreal(ncols * ncols); c_covmat(column, mean, &ncols, num, covariance); memcpy(covariance_backup, covariance, ncols * ncols * sizeof(double)); /* substitute the original data with the fake column that will be permuted. */ yperm = alloc1dreal(*num); yorig = column[1]; memcpy(yperm, yorig, *num * sizeof(double)); column[1] = yperm; /* allocate the arrays needed by RandomPermutation. */ perm = alloc1dcont(*num); work = alloc1dcont(*num); /* initialize the random number generator. */ GetRNGstate(); /* pick up the observed value of the test statistic, then generate a set of random permutations (all variable but the second are fixed) and check how many tests are greater (in absolute value) than the original one.*/ switch(INT(test)) { case GAUSSIAN_MUTUAL_INFORMATION: case LINEAR_CORRELATION: case FISHER_Z: observed = c_fast_pcor(covariance, &ncols, u, d, vt, &errcode); if (errcode) error("an error (%d) occurred in the call to dgesvd().\n", errcode); for (j = 0; j < (*B); j++) { /* reset the error flag of the SVD Fortran routine. */ errcode = 0; RandomPermutation(*num, perm, work); for (k = 0; k < *num; k++) yperm[k] = yorig[perm[k]]; /* restore the covariance matrix from the good copy. */ memcpy(covariance, covariance_backup, ncols * ncols * sizeof(double)); /* update the relevant covariances. */ c_update_covmat(column, mean, 1, &ncols, num, covariance); permuted = c_fast_pcor(covariance, &ncols, u, d, vt, &errcode); if (errcode != 0) error_counter++; if (fabs(permuted) > fabs(observed)) { sequential_counter_check(*res); }/*THEN*/ }/*FOR*/ if (error_counter > 0) warning("unable to compute %d permutations due to errors in dgesvd().\n", error_counter); break; }/*SWITCH*/ PutRNGstate(); /* save the observed p-value. */ *res /= *B; UNPROTECT(1); return result; }/*GAUSS_CMCARLO*/
/* unconditional Monte Carlo simulation for correlation-based tests. */ SEXP gauss_mcarlo(SEXP x, SEXP y, SEXP samples, SEXP test, SEXP alpha) { int j = 0, k = 0, num = LENGTH(x), *B = INTEGER(samples); double *xx = REAL(x), *yy = REAL(y), *yperm = NULL, *res = NULL; double observed = 0, enough = ceil(NUM(alpha) * (*B)) + 1, xm = 0, ym = 0; int *perm = NULL, *work = NULL; SEXP result; /* allocate the arrays needed by RandomPermutation. */ perm = alloc1dcont(num); work = alloc1dcont(num); /* allocate the array for the pemutations. */ yperm = alloc1dreal(num); /* cache the means of the two variables (they are invariant under permutation). */ for (j = 0; j < num; j++) { xm += xx[j]; ym += yy[j]; }/*FOR*/ xm /= num; ym /= num; /* allocate the result. */ PROTECT(result = allocVector(REALSXP, 1)); res = REAL(result); *res = 0; /* initialize the random number generator. */ GetRNGstate(); /* pick up the observed value of the test statistic, then generate a set of random permutations (all variable but the second are fixed) and check how many tests are greater (in absolute value) than the original one.*/ switch(INT(test)) { case GAUSSIAN_MUTUAL_INFORMATION: case LINEAR_CORRELATION: case FISHER_Z: observed = _cov(xx, yy, &xm, &ym, &num); for (j = 0; j < *B; j++) { RandomPermutation(num, perm, work); for (k = 0; k < num; k++) yperm[k] = yy[perm[k]]; if (fabs(_cov(xx, yperm, &xm, &ym, &num)) > fabs(observed)) { sequential_counter_check(*res); }/*THEN*/ }/*FOR*/ break; }/*SWITCH*/ PutRNGstate(); /* save the observed p-value. */ *res /= *B; UNPROTECT(1); return result; }/*GAUSS_MCARLO*/
SEXP cwpost(SEXP x, SEXP z, SEXP imaginary, SEXP phi_coef) { int i = 0, j = 0, k = 0; int ncols = LENGTH(z), num = LENGTH(x), tau_ncols = LENGTH(z) + 1; int *iss = INTEGER(imaginary), rho = *iss + ncols; double logscale = 0, logk = 0, xprod = 0, var_x = 0, zi_mu = 0, phi = 0; double *xx = REAL(x), *phic = REAL(phi_coef), *workspace = NULL; double *res = NULL, **zz = NULL, *zi = NULL, *mu = NULL, *delta_mu = NULL; double *tau = NULL, *invtau = NULL, *old_tau = NULL, *old_mu = NULL; SEXP result; /* allocate a workspace vector. */ workspace = alloc1dreal(tau_ncols); /* allocate and initialize the parent configuration. */ zi = alloc1dreal(ncols + 1); zi[0] = 1; /* estimate mu and var_x. */ mu = alloc1dreal(tau_ncols); old_mu = alloc1dreal(tau_ncols); delta_mu = alloc1dreal(tau_ncols); for (i = 0; i < num; i++) mu[0] += xx[i]; mu[0] /= num; for (i = 0; i < num; i++) var_x += (xx[i] - mu[0]) * (xx[i] - mu[0]); var_x /= num - 1; /* initialize phi. */ phi = var_x * (*phic); /* allocate and initialize an array of pointers for the variables. */ zz = (double **) alloc1dpointer(ncols); for (j = 0; j < ncols; j++) zz[j] = REAL(VECTOR_ELT(z, j)); /* allocate and initialize tau. */ tau = alloc1dreal(tau_ncols * tau_ncols); old_tau = alloc1dreal(tau_ncols * tau_ncols); invtau = alloc1dreal(tau_ncols * tau_ncols); build_tau(zz, tau, &ncols, &num, iss, phic); memcpy(old_tau, tau, tau_ncols * tau_ncols * sizeof(double)); c_ginv(tau, &tau_ncols, invtau); /* allocate and initialize result to zero. */ PROTECT(result = allocVector(REALSXP, 1)); res = REAL(result); *res = 0; /* for each sample... */ for (i = 0; i < num; i++) { /* ... extract the values of the parents ... */ for (j = 0; j < ncols; j++) zi[j + 1] = zz[j][i]; /* ... compute the Mahalanobis distance of z[i] ... */ xprod = c_quadratic(zi, &tau_ncols, invtau, zi, workspace); /* ... compute the scale factor ... */ logscale = log(phi) + log1p(xprod); logk = lgammafn(0.5 * (1 + rho)) - lgammafn(0.5 * rho); logk -= 0.5 * (logscale + log(M_PI)); /* and then the score for the variable. */ for (j = 0, zi_mu = 0; j < tau_ncols; j++) zi_mu += zi[j] * mu[j]; *res += logk - 0.5 * (1 + rho) * log1p((xx[i] - zi_mu) * (xx[i] - zi_mu) / exp(logscale)); /* For the next iteration, update the tau matrix ... */ memcpy(old_tau, tau, tau_ncols * tau_ncols * sizeof(double)); for (j = 0; j < tau_ncols; j++) for (k = j; k < tau_ncols; k++) tau[CMC(j, k, tau_ncols)] = tau[CMC(k, j, tau_ncols)] = tau[CMC(j, k, tau_ncols)] + zi[j] * zi[k]; /* ... its inverse ... */ c_finv(tau, &tau_ncols, invtau); /* ... update the mu vector ... */ memcpy(old_mu, mu, tau_ncols * sizeof(double)); c_rotate(invtau, old_tau, mu, &(xx[i]), zi, &tau_ncols, workspace); /* ... update rho (ISS + sample size evaluated at the current iteration) ... */ rho++; /* ... and update phi. */ for (j = 0; j < tau_ncols; j++) delta_mu[j] = old_mu[j] - mu[j]; for (j = 0, zi_mu = 0; j < tau_ncols; j++) zi_mu += zi[j] * mu[j]; phi += (xx[i] - zi_mu) * xx[i] + c_quadratic(delta_mu, &tau_ncols, old_tau, old_mu, workspace); }/*FOR*/ UNPROTECT(1); return result; }/*CWPOST*/
SEXP hc_to_be_added(SEXP arcs, SEXP blacklist, SEXP whitelist, SEXP nparents, SEXP maxp, SEXP nodes, SEXP convert) { int i = 0, j = 0, narcs = 0, dims = length(nodes); int *a = NULL, *coords = NULL; double *mp = REAL(maxp), *np = NULL; short int duplicated = 0; SEXP try, result = R_NilValue, result2; /* transform the arc set into an adjacency matrix, if it's not one already. */ if (isInteger(arcs)) { if ((duplicated = NAMED(arcs)) > 0) PROTECT(result = duplicate(arcs)); }/*THEN*/ else { PROTECT(result = arcs2amat(arcs, nodes)); }/*ELSE*/ /* dereference the adjacency matrix once and for all. */ a = INTEGER(result); /* compute the number the parents of each node, unless provided. */ if (nparents == R_NilValue) { np = alloc1dreal(dims); for (i = 0; i < dims; i++) for (j = 0; j < dims; j++) np[j] = a[CMC(i, j, dims)]; }/*THEN*/ else { np = REAL(nparents); }/*ELSE*/ /* flip all the nondiagonal cells. */ for (j = 0; j < dims; j++) { for (i = 0; i < dims; i++) { /* diagonal elements are always equal to zero, skip them. */ if (i == j) continue; a[CMC(i, j, dims)] = 1 - a[CMC(i, j, dims)]; }/*FOR*/ }/*FOR*/ /* if an arc is present in the graph in one direction, you cannot add it in * the other direction (it would be a reversal); flip both in the adjacency * matrix. */ for (j = 0; j < dims; j++) for (i = j + 1; i < dims; i++) a[CMC(j, i, dims)] = a[CMC(i, j, dims)] = a[CMC(i, j, dims)] * a[CMC(j, i, dims)]; /* if a node has already reached its maximum number parents, do not add * more arcs pointing to that node. */ for (j = 0; j < dims; j++) if (np[j] >= *mp) memset(a + j * dims, '\0', dims * sizeof(int)); #define FLIP_FROM_LIST(list, value) \ if (!isNull(list)) { \ if (!isInteger(list)) { \ PROTECT(try = match(nodes, list, 0)); \ coords = INTEGER(try); \ narcs = length(try)/2; \ for (i = 0; i < narcs; i++) \ a[CMC(coords[i] - 1, coords[i + narcs] - 1, dims)] = value; \ UNPROTECT(1); \ }/*THEN*/ \ else { \ coords = INTEGER(list); \ for (i = 0; i < dims * dims; i ++) \ if (coords[i] == 1) \ a[i] = value; \ }/*ELSE*/ \ }/*THEN*/ /* now the blacklist gets involved. */ FLIP_FROM_LIST(blacklist, 0); /* and, last but not least, the whitelist gets involved. */ FLIP_FROM_LIST(whitelist, 1); /* return either the adjacency matrix or the arc set. */ if (isTRUE(convert)) { PROTECT(result2 = amat2arcs(result, nodes)); if ((duplicated > 0) || !isInteger(arcs)) UNPROTECT(2); else UNPROTECT(1); return result2; }/*THEN*/ else { if ((duplicated > 0) || !isInteger(arcs)) UNPROTECT(1); return result; }/*ELSE*/ }/*HC_TO_BE_ADDED*/