Beispiel #1
0
/* predict the values of one or more variables given one or more variables by
 * maximum a posteriori (MAP). */
SEXP mappred(SEXP node, SEXP fitted, SEXP data, SEXP n, SEXP from, SEXP debug) {

int i = 0, j = 0, k = 0, nobs = 0, nev = 0, nlvls = 0;
int *vartypes = NULL, nsims = INT(n), debuglevel = isTRUE(debug);
void **varptrs = NULL, **evptrs = NULL, *pred = NULL, *res = NULL;
SEXP result, colnames, evidence, evmatch, temp = R_NilValue;
SEXP cpdist, predicted, lvls = R_NilValue;
double *wgt = NULL;
long double *lvls_counts = NULL;

  /* extract the names of the variables in the data. */
  colnames = getAttrib(data, R_NamesSymbol);

  /* remove the name of the variable to predict. */
  nev = length(from);
  PROTECT(evmatch = match(colnames, from, 0));

  /* cache variable types and pointers. */
  vartypes = alloc1dcont(nev);
  varptrs = alloc1dpointer(nev);
  for (j = 0, k = 0; j < nev; j++) {

    temp = VECTOR_ELT(data, INTEGER(evmatch)[j] - 1);
    vartypes[k] = TYPEOF(temp);
    varptrs[k++] = DATAPTR(temp);

  }/*FOR*/

  /* cache the sample size. */
  nobs = length(temp);

  /* allocate a list to hold the evidence. */
  PROTECT(evidence = allocVector(VECSXP, nev));
  setAttrib(evidence, R_NamesSymbol, from);

  /* cache pointers to the elements of the evidence .*/
  evptrs = alloc1dpointer(nev);

  for (j = 0; j < nev; j++) {

    PROTECT(temp = allocVector(vartypes[j], 1));
    evptrs[j] = DATAPTR(temp);
    SET_VECTOR_ELT(evidence, j, temp);
    UNPROTECT(1);

  }/*FOR*/

  /* make the evidence a data frame to compact debugging output. */
  minimal_data_frame(evidence);

  /* allocate the return value. */
  PROTECT(result = fitnode2df(fitted, STRING_ELT(node, 0), nobs));
  res = DATAPTR(result);

  /* in the case of discrete variables, allocate scratch space for levels'
   * frequencies. */
  if (TYPEOF(result) == INTSXP) {

    lvls = getAttrib(result, R_LevelsSymbol);
    nlvls = length(lvls);
    lvls_counts = allocldouble(nlvls);

  }/*THEN*/

  /* allocate the weights. */
  wgt = alloc1dreal(nsims);

  /* allocate sratch space for the random samplings. */
  PROTECT(cpdist = fit2df(fitted, nsims));
  predicted = getListElement(cpdist, (char *)CHAR(STRING_ELT(node, 0)));
  pred = DATAPTR(predicted);

  /* iterate over the observations. */
  for (i = 0; i < nobs; i++) {

    /* copy the values into the list. */
    for (j = 0; j < nev; j++) {

      switch(vartypes[j]) {

        case REALSXP:

          *((double *)evptrs[j]) = ((double *)varptrs[j])[i];
          break;

        case INTSXP:

          *((int *)evptrs[j]) = ((int *)varptrs[j])[i];
          break;

      }/*SWITCH*/

    }/*FOR*/

    if (debuglevel > 0) {

      Rprintf("* predicting observation %d conditional on:\n", i);
      PrintValue(evidence);

    }/*THEN*/

    /* generate samples from the conditional posterior distribution. */
    c_rbn_master(fitted, cpdist, n, evidence, FALSE);
    /* compute the weights. */
    c_lw_weights(fitted, cpdist, nsims, wgt, from, FALSE);

    /* compute the posterior estimate. */
    switch(TYPEOF(predicted)) {

      case REALSXP:

        /* average the predicted values. */
        ((double *)res)[i] = posterior_mean((double *)pred, wgt, nsims,
                               debuglevel);
        break;

      case INTSXP:

        /* pick the most frequent value. */
        ((int *)res)[i] = posterior_mode((int *)pred, wgt, nsims, lvls_counts,
                            lvls, nlvls, debuglevel);
        break;

    }/*SWITCH*/

  }/*FOR*/

  UNPROTECT(4);

  return result;

}/*MAPPRED*/
Beispiel #2
0
double castelo_prior(SEXP beta, SEXP target, SEXP parents, SEXP children,
                     SEXP debug) {

    int i = 0, k = 0, t = 0, nnodes = 0, cur_arc = 0;
    int nbeta = LENGTH(VECTOR_ELT(beta, 0));
    int *temp = NULL, *debuglevel = LOGICAL(debug), *aid = INTEGER(VECTOR_ELT(beta, 2));
    double prior = 0, result = 0;
    double *bkwd = REAL(VECTOR_ELT(beta, 4)), *fwd = REAL(VECTOR_ELT(beta, 3));
    short int *adjacent = NULL;
    SEXP nodes, try;

    /* get the node labels. */
    nodes = getAttrib(beta, install("nodes"));
    nnodes = LENGTH(nodes);

    /* match the target node. */
    PROTECT(try = match(nodes, target, 0));
    t = INT(try);
    UNPROTECT(1);

    /* find out which nodes are parents and which nodes are children. */
    adjacent = allocstatus(nnodes);

    PROTECT(try = match(nodes, parents, 0));
    temp = INTEGER(try);
    for (i = 0; i < LENGTH(try); i++)
        adjacent[temp[i] - 1] = PARENT;
    UNPROTECT(1);

    PROTECT(try = match(nodes, children, 0));
    temp = INTEGER(try);
    for (i = 0; i < LENGTH(try); i++)
        adjacent[temp[i] - 1] = CHILD;
    UNPROTECT(1);

    /* prior probabilities table lookup. */
    for (i = t + 1; i <= nnodes; i++) {

        /* compute the arc id. */
        cur_arc = UPTRI3(t, i, nnodes);

        /* look up the prior probability. */
        for (/*k,*/ prior = ((double)1/3); k < nbeta; k++) {

            /* arcs are ordered, so we can stop early in the lookup. */
            if (aid[k] > cur_arc)
                break;

            if (aid[k] == cur_arc) {

                switch(adjacent[i - 1]) {

                case PARENT:
                    prior = bkwd[k];
                    break;
                case CHILD:
                    prior = fwd[k];
                    break;
                default:
                    prior = 1 - bkwd[k] - fwd[k];

                }/*SWITCH*/

                break;

            }/*THEN*/

        }/*FOR*/

        if (*debuglevel > 0) {

            switch(adjacent[i - 1]) {

            case PARENT:
                Rprintf("  > found arc %s -> %s, prior pobability is %lf.\n",
                        NODE(i - 1), NODE(t - 1), prior);
                break;
            case CHILD:
                Rprintf("  > found arc %s -> %s, prior probability is %lf.\n",
                        NODE(t - 1), NODE(i - 1), prior);
                break;
            default:
                Rprintf("  > no arc between %s and %s, prior probability is %lf.\n",
                        NODE(t - 1), NODE(i - 1), prior);

            }/*SWITCH*/

        }/*THEN*/

        /* move to log-scale and divide by the non-informative log(1/3), so that
         * the contribution of each arc whose prior has not been not specified by
         * the user is zero; overflow is likely otherwise. */
        result += log(prior / ((double)1/3));

    }/*FOR*/

    return result;

}/*CASTELO_PRIOR*/

/* complete a prior as per Castelo & Siebes. */
SEXP castelo_completion(SEXP prior, SEXP nodes)  {

    int i = 0, k = 0, cur = 0, narcs1 = 0, narcs2 = 0, nnodes = LENGTH(nodes);
    int *m1 = NULL, *m2 = NULL, *und = NULL, *aid = NULL, *poset = NULL, *id = NULL;
    double *d1 = NULL, *d2 = NULL, *p = NULL;
    SEXP df, arc_id, undirected, a1, a2, match1, match2, prob;
    SEXP result, colnames, from, to, nid, dir1, dir2;

    /* compute numeric IDs for the arcs. */
    a1 = VECTOR_ELT(prior, 0);
    a2 = VECTOR_ELT(prior, 1);
    narcs1 = LENGTH(a1);
    PROTECT(match1 = match(nodes, a1, 0));
    PROTECT(match2 = match(nodes, a2, 0));
    m1 = INTEGER(match1);
    m2 = INTEGER(match2);
    PROTECT(arc_id = allocVector(INTSXP, narcs1));
    aid = INTEGER(arc_id);

    c_arc_hash(&narcs1, &nnodes, m1, m2, aid, NULL, TRUE);

    /* duplicates correspond to undirected arcs. */
    PROTECT(undirected = dupe(arc_id));
    und = INTEGER(undirected);

    /* extract the components from the prior. */
    prob = VECTOR_ELT(prior, 2);
    p = REAL(prob);

    /* count output arcs. */
    for (i = 0; i < narcs1; i++)
        narcs2 += 2 - und[i];
    narcs2 /= 2;

    /* allocate the columns of the return value. */
    PROTECT(from = allocVector(STRSXP, narcs2));
    PROTECT(to = allocVector(STRSXP, narcs2));
    PROTECT(nid = allocVector(INTSXP, narcs2));
    id = INTEGER(nid);
    PROTECT(dir1 = allocVector(REALSXP, narcs2));
    d1 = REAL(dir1);
    PROTECT(dir2 = allocVector(REALSXP, narcs2));
    d2 = REAL(dir2);

    /* sort the strength coefficients. */
    poset = alloc1dcont(narcs1);
    for (k = 0; k < narcs1; k++)
        poset[k] = k;
    R_qsort_int_I(aid, poset, 1, narcs1);

    for (i = 0, k = 0; i < narcs1; i++) {

        cur = poset[i];

#define ASSIGN(A1, A2, D1, D2) \
  SET_STRING_ELT(from,  k, STRING_ELT(A1, cur)); \
  SET_STRING_ELT(to,  k, STRING_ELT(A2, cur)); \
  id[k] = aid[i]; \
  D1[k] = p[cur]; \
  if ((und[cur] == TRUE) && (i < narcs1 - 1)) \
    D2[k] = p[poset[++i]]; \
  else \
    D2[k] = (1 - D1[k])/2;

        /* copy the node labels. */
        if (m1[cur] < m2[cur]) {

            ASSIGN(a1, a2, d1, d2);

        }/*THEN*/
        else {

            ASSIGN(a2, a1, d2, d1);

        }/*ELSE*/

        if (d1[k] + d2[k] > 1) {

            UNPROTECT(9);

            error("the probabilities for arc %s -> %s sum to %lf.",
                  CHAR(STRING_ELT(from, k)), CHAR(STRING_ELT(to, k)), d1[k] + d2[k]);

        }/*THEN*/

        /* move to the next arc. */
        k++;

    }/*FOR*/

    /* set up the return value. */
    PROTECT(result = allocVector(VECSXP, 5));
    SET_VECTOR_ELT(result, 0, from);
    SET_VECTOR_ELT(result, 1, to);
    SET_VECTOR_ELT(result, 2, nid);
    SET_VECTOR_ELT(result, 3, dir1);
    SET_VECTOR_ELT(result, 4, dir2);
    PROTECT(colnames = allocVector(STRSXP, 5));
    SET_STRING_ELT(colnames, 0, mkChar("from"));
    SET_STRING_ELT(colnames, 1, mkChar("to"));
    SET_STRING_ELT(colnames, 2, mkChar("aid"));
    SET_STRING_ELT(colnames, 3, mkChar("fwd"));
    SET_STRING_ELT(colnames, 4, mkChar("bkwd"));
    setAttrib(result, R_NamesSymbol, colnames);
    PROTECT(df = minimal_data_frame(result));

    UNPROTECT(12);

    return df;

}/*CASTELO_COMPLETION*/
/* mean strength for p-values and score deltas. */
static void mean_strength_overall(SEXP *mean_df, SEXP strength, SEXP nodes,
  int nrows, int nstr, SEXP ref_hash, double *w) {

int i = 0, j = 0, *t = NULL;
double *mstr = NULL, *cur_strength = NULL;
long double cumw = 0;
SEXP mean_str, cur, cur_hash, try;

  /* allocate the strength accumulator vector. */
  PROTECT(mean_str = allocVector(REALSXP, nrows));
  SET_VECTOR_ELT(*mean_df, 2, mean_str);
  mstr = REAL(mean_str);
  memset(mstr, '\0', nrows * sizeof(double));

  for (i = 0; i < nstr; i++) {

    /* move to the next object. */
    cur = VECTOR_ELT(strength, i);
    /* get the strength values from the bn.strength object. */
    cur_strength = REAL(VECTOR_ELT(cur, 2));
    /* get the arc IDs to use to correctly match strengths. */
    PROTECT(cur_hash = arc_hash(cur, nodes, FALSE, FALSE));

    /* match the current arc IDs to the reference arc IDs. */
    PROTECT(try = match(ref_hash, cur_hash, 0));
    t = INTEGER(try);

    for (j = 0; j < nrows; j++)
      mstr[t[j] - 1] += w[i] * cur_strength[j];

    /* update the total weight mass. */
    cumw += w[i];

    UNPROTECT(2);

  }/*FOR*/

  /* rescale by the total weight mass. */
  for (j = 0; j < nrows; j++)
    mstr[j] /= cumw;

  UNPROTECT(1);

}/*MEAN_STRENGTH_OVERALL*/

/* mean strength for bootstrap probabilities. */
static void mean_strength_direction(SEXP *mean_df, SEXP strength, SEXP nodes,
  int nrows, int nstr, SEXP ref_hash, double *w) {

int i = 0, j = 0, *t = NULL, nnodes = length(nodes);
double *mstr = NULL, *mdir = NULL, *cur_strength = NULL, *cur_dir = NULL;
double fwd = 0, bkwd = 0;
long double cumw = 0;
SEXP mean_str, mean_dir, cur, cur_hash, try;


  /* allocate vectors for strength and direction. */
  PROTECT(mean_str = allocVector(REALSXP, nrows));
  SET_VECTOR_ELT(*mean_df, 2, mean_str);
  mstr = REAL(mean_str);
  memset(mstr, '\0', nrows * sizeof(double));
  PROTECT(mean_dir = allocVector(REALSXP, nrows));
  SET_VECTOR_ELT(*mean_df, 3, mean_dir);
  mdir = REAL(mean_dir);
  memset(mdir, '\0', nrows * sizeof(double));

  for (i = 0; i < nstr; i++) {

    /* move to the next object. */
    cur = VECTOR_ELT(strength, i);
    /* get the strength and direction values from the bn.strength object. */
    cur_strength = REAL(VECTOR_ELT(cur, 2));
    cur_dir = REAL(VECTOR_ELT(cur, 3));
    /* get the arc IDs to use to correctly match strengths. */
    PROTECT(cur_hash = arc_hash(cur, nodes, FALSE, FALSE));

    /* match the current arc IDs to the reference arc IDs. */
    PROTECT(try = match(ref_hash, cur_hash, 0));
    t = INTEGER(try);

    for (j = 0; j < nrows; j++)
      mstr[t[j] - 1] += w[i] * (cur_strength[j] * cur_dir[j]);

    /* update the total weight mass. */
    cumw += w[i];

    UNPROTECT(2);

  }/*FOR*/

  /* rescale by the total weight mass. */
  for (j = 0; j < nrows; j++)
    mstr[j] /= cumw;

  /* split arc strength from direction strength. */
  for (i = 0; i < nnodes; i++) {

    for (j = i + 1; j < nnodes; j++) {

      fwd = mstr[CMC(j, i, nnodes) - i - 1];
      bkwd = mstr[CMC(i, j, nnodes) - j];

      mstr[CMC(j, i, nnodes) - i - 1] = mstr[CMC(i, j, nnodes) - j] = fwd + bkwd;

      if (bkwd + fwd > 0) {

      mdir[CMC(j, i, nnodes) - i - 1] = fwd / (fwd + bkwd);
      mdir[CMC(i, j, nnodes) - j] = bkwd / (fwd + bkwd);

      }/*THEN*/
      else {

        mdir[CMC(j, i, nnodes) - i - 1] = mdir[CMC(i, j, nnodes) - j] = 0;

      }/*ELSE*/

    }/*FOR*/

  }/*FOR*/

  UNPROTECT(2);

}/*MEAN_STRENGTH_DIRECTION*/

/* average multiple bn.strength objects, with weights. */
SEXP mean_strength(SEXP strength, SEXP nodes, SEXP weights) {

int nstr = length(weights), ncols = 0, nrows = 0;
double *w = REAL(weights);
const char *m = NULL;
SEXP ref, ref_hash, mean_df, method;

  /* initialize the result using the first bn.strength object as a reference. */
  ref = VECTOR_ELT(strength, 0);
  ncols = length(ref);
  nrows = length(VECTOR_ELT(ref, 0));

  PROTECT(mean_df = allocVector(VECSXP, ncols));
  setAttrib(mean_df, R_NamesSymbol, getAttrib(ref, R_NamesSymbol));
  SET_VECTOR_ELT(mean_df, 0, VECTOR_ELT(ref, 0));
  SET_VECTOR_ELT(mean_df, 1, VECTOR_ELT(ref, 1));
  /* make it a data frame */
  minimal_data_frame(mean_df);

  /* compute the arc IDs to match arcs of later bn.strength objects. */
  PROTECT(ref_hash = arc_hash(ref, nodes, FALSE, FALSE));

  /* switch backend according to how the strengths were computed. */
  method = getAttrib(ref, BN_MethodSymbol);
  m = CHAR(STRING_ELT(method, 0));

  if ((strcmp(m, "score") == 0) || (strcmp(m, "test") == 0))
    mean_strength_overall(&mean_df, strength, nodes, nrows, nstr, ref_hash, w);
  else if (strcmp(m, "bootstrap") == 0)
    mean_strength_direction(&mean_df, strength, nodes, nrows, nstr, ref_hash, w);

  UNPROTECT(2);

  return mean_df;

}/*MEAN_STRENGTH*/
Beispiel #4
0
/* remove one variable in each highly-correlated pair. */
SEXP dedup (SEXP data, SEXP threshold, SEXP complete, SEXP debug) {

int i = 0, j = 0, k = 0, dropped = 0, nc = 0;
int debuglevel = isTRUE(debug);
double *mean = NULL, *sse = NULL, *xx = NULL, *yy = NULL;
double cur_mean[2], cur_sse[2];
double tol = MACHINE_TOL, t = NUM(threshold);
long double sum = 0;
SEXP result, colnames;
gdata dt = { 0 };

  /* extract the columns from the data frame. */
  dt = gdata_from_SEXP(data, 0);
  meta_init_flags(&(dt.m), 0, complete, R_NilValue);
  meta_copy_names(&(dt.m), 0, data);
  /* set up the vectors for the pairwise complete observations. */
  xx = Calloc1D(dt.m.nobs, sizeof(double));
  yy = Calloc1D(dt.m.nobs, sizeof(double));

  if (debuglevel > 0)
    Rprintf("* caching means and variances.\n");

  mean = Calloc1D(dt.m.ncols, sizeof(double));
  sse = Calloc1D(dt.m.ncols, sizeof(double));

  /* cache the mean and variance of complete variables. */
  for (j = 0; j < dt.m.ncols; j++) {

    if (!dt.m.flag[j].complete)
      continue;

    mean[j] = c_mean(dt.col[j], dt.m.nobs);
    sse[j] = c_sse(dt.col[j], mean[j], dt.m.nobs);

  }/*FOR*/

  /* main loop. */
  for (j = 0; j < dt.m.ncols - 1; j++) {

    /* skip variables already flagged for removal. */
    if (dt.m.flag[j].drop)
      continue;

    if (debuglevel > 0)
      Rprintf("* looking at %s with %d variables still to check.\n",
        dt.m.names[j], dt.m.ncols - (j + 1));

    for (k = j + 1; k < dt.m.ncols; k++) {

      /* skip variables already flagged for removal. */
      if (dt.m.flag[k].drop)
        continue;

      if (dt.m.flag[j].complete && dt.m.flag[k].complete) {

        /* use the cached means and variances. */
        cur_mean[0] = mean[j];
        cur_mean[1] = mean[k];
        cur_sse[0] = sse[j];
        cur_sse[1] = sse[k];

        /* compute the covariance. */
        for (i = 0, sum = 0; i < dt.m.nobs; i++)
          sum += (dt.col[j][i] - cur_mean[0]) * (dt.col[k][i] - cur_mean[1]);

      }/*THEN*/
      else {

        for (i = 0, nc = 0; i < dt.m.nobs; i++) {

          if (ISNAN(dt.col[j][i]) || ISNAN(dt.col[k][i]))
            continue;

          xx[nc] = dt.col[j][i];
          yy[nc++] = dt.col[k][i];

        }/*FOR*/


        /* if there are no complete observations, take the variables to be
         * independent. */
        if (nc == 0)
          continue;

        cur_mean[0] = c_mean(xx, nc);
        cur_mean[1] = c_mean(yy, nc);
        cur_sse[0] = c_sse(xx, cur_mean[0], nc);
        cur_sse[1] = c_sse(yy, cur_mean[1], nc);

        /* compute the covariance. */
        for (i = 0, sum = 0; i < nc; i++)
          sum += (xx[i] - cur_mean[0]) * (yy[i] - cur_mean[1]);

      }/*ELSE*/

      /* safety check against "divide by zero" errors. */
      if ((cur_sse[0] < tol) || (cur_sse[1] < tol))
        sum = 0;
      else
        sum /= sqrt(cur_sse[0] * cur_sse[1]);

      /* test the correlation against the threshold. */
      if (fabsl(sum) > t) {

        if (debuglevel > 0)
          Rprintf("%s is collinear with %s, dropping %s with COR = %.4Lf\n",
            dt.m.names[j], dt.m.names[k], dt.m.names[k], sum);

        /* flag the variable for removal. */
        dt.m.flag[k].drop = TRUE;
        dropped++;

      }/*THEN*/

    }/*FOR*/

  }/*FOR*/

  /* set up the return value. */
  PROTECT(result = allocVector(VECSXP, dt.m.ncols - dropped));
  PROTECT(colnames = allocVector(STRSXP, dt.m.ncols - dropped));

  for (j = 0, k = 0; j < dt.m.ncols; j++)
    if (!dt.m.flag[j].drop) {

      SET_STRING_ELT(colnames, k, mkChar(dt.m.names[j]));
      SET_VECTOR_ELT(result, k++, VECTOR_ELT(data, j));

    }/*THEN*/

  setAttrib(result, R_NamesSymbol, colnames);

  /* make it a data frame. */
  minimal_data_frame(result);

  Free1D(mean);
  Free1D(sse);
  Free1D(xx);
  Free1D(yy);
  FreeGDT(dt, FALSE);

  UNPROTECT(2);

  return result;

}/*DEDUP*/
Beispiel #5
0
SEXP rbn_expand_fix(SEXP fix, SEXP nodes, int *nnodes) {

int i = 0, *f = NULL;
SEXP result, fixed_nodes, try;

  /* get the names of the nodes that are fixed. */
  fixed_nodes = getAttrib(fix, R_NamesSymbol);
  /* match the names with the nodes in the network. */
  PROTECT(try = match(nodes, fixed_nodes, 0));
  f = INTEGER(try);
  /* allocate the return value. */
  PROTECT(result = allocVector(VECSXP, *nnodes));
  setAttrib(result, R_NamesSymbol, nodes);

  for (i = 0; i < LENGTH(fixed_nodes); i++)
    SET_VECTOR_ELT(result, f[i] - 1, VECTOR_ELT(fix, i));

  UNPROTECT(2);
  return result;

}/*RBN_EXPAND_FIX*/

/* generate random observations from a bayesian network. */
SEXP rbn_master(SEXP fitted, SEXP n, SEXP fix, SEXP debug) {

int *num = INTEGER(n), *poset = NULL, *debuglevel = LOGICAL(debug), type = 0;
int has_fixed = (TYPEOF(fix) != LGLSXP);
int i = 0, k = 0, cur = 0, nnodes = LENGTH(fitted), nparents = 0;
const char *cur_class = NULL;
SEXP result, nodes, roots, node_depth, cpt, coefs, sd, parents, parent_vars, false;
SEXP cur_node, cur_fixed;

  /* set up a logical variable to be used in the following calls. */
  PROTECT(false = allocVector(LGLSXP, 1));
  LOGICAL(false)[0] = FALSE;

  /* allocate and initialize the return value. */
  PROTECT(result = allocVector(VECSXP, nnodes));
  nodes = getAttrib(fitted, R_NamesSymbol);
  setAttrib(result, R_NamesSymbol, nodes);

  /* order the nodes according to their depth in the graph. */
  PROTECT(roots = root_nodes(fitted, false));
  PROTECT(node_depth = schedule(fitted, roots, false, false));
  poset = alloc1dcont(nnodes);
  for (i = 0; i < nnodes; i++)
    poset[i] = i;
  R_qsort_int_I(INTEGER(node_depth), poset, 1, nnodes);

  /* unprotect roots and node_depth, they are not needed any more. */
  UNPROTECT(2);

  if (has_fixed)
    PROTECT(fix = rbn_expand_fix(fix, nodes, &nnodes));

  if (*debuglevel > 0) {

    Rprintf("* partial node ordering is:");

    for (i = 0; i < nnodes; i++)
      Rprintf(" %s", NODE(poset[i]));

    Rprintf(".\n");

  }/*THEN*/

  /* initialize the random number generator. */
  GetRNGstate();

  for (i = 0; i < nnodes; i++) {

    /* get the index of the node we have to generate random observations from,
     * its conditional probability table/regression parameters and the number
     * of its parents. */
    cur = poset[i];
    cur_node = VECTOR_ELT(fitted, cur);
    cur_class = CHAR(STRING_ELT(getAttrib(cur_node, R_ClassSymbol), 0));
    parents = getListElement(cur_node, "parents");
    nparents = LENGTH(parents);

    /* check whether the value of the node is fixed, and if so retrieve it from 
     * the list. */
    if (has_fixed)
      cur_fixed = VECTOR_ELT(fix, cur);
    else
      cur_fixed = R_NilValue;

    /* find out whether the node corresponds to an ordered factor or not. */
    if (strcmp(cur_class, "bn.fit.onode") == 0) {

      cpt = getListElement(cur_node, "prob");
      type = ORDINAL;

    }/*THEN*/
    else if (strcmp(cur_class, "bn.fit.dnode") == 0) {

      cpt = getListElement(cur_node, "prob");
      type = CATEGORICAL;

    }/*THEN*/
    else if (strcmp(cur_class, "bn.fit.gnode") == 0) {

      coefs = getListElement(cur_node, "coefficients");
      sd = getListElement(cur_node, "sd");
      type = GAUSSIAN;

    }/*THEN*/

    /* generate the random observations for the current node. */
    if (nparents == 0) {

      if (*debuglevel > 0) {

        if (cur_fixed != R_NilValue)
          Rprintf("* node %s is fixed.\n", NODE(cur));
        else
          Rprintf("* simulating node %s, which doesn't have any parent.\n",
            NODE(cur));

      }/*THEN*/

      switch(type) {

        case CATEGORICAL:
          rbn_discrete_root(result, cur, cpt, num, FALSE, cur_fixed);
          break;

        case ORDINAL:
          rbn_discrete_root(result, cur, cpt, num, TRUE, cur_fixed);
          break;

        case GAUSSIAN:
          rbn_gaussian(result, cur, NULL, coefs, sd, num, cur_fixed);
          break;

      }/*SWITCH*/

    }/*THEN*/
    else {

      if (*debuglevel > 0) {

        if (cur_fixed != R_NilValue) {

          Rprintf("* node %s is fixed, ignoring parents.\n", NODE(cur));

        }/*THEN*/
        else {

          Rprintf("* simulating node %s with parents ", NODE(cur));
          for (k = 0; k < nparents - 1; k++)
            Rprintf("%s, ", CHAR(STRING_ELT(parents, k)));
          Rprintf("%s.\n", CHAR(STRING_ELT(parents, nparents - 1)));

        }/*ELSE*/

      }/*THEN*/

      PROTECT(parent_vars = dataframe_column(result, parents, false));

      switch(type) {

        case CATEGORICAL:
          rbn_discrete_cond(result, nodes, cur, parent_vars, cpt, num, FALSE, cur_fixed);
          break;

        case ORDINAL:
          rbn_discrete_cond(result, nodes, cur, parent_vars, cpt, num, TRUE, cur_fixed);
          break;

        case GAUSSIAN:
          rbn_gaussian(result, cur, parent_vars, coefs, sd, num, cur_fixed);
          break;

      }/*SWITCH*/

      UNPROTECT(1);

    }/*ELSE*/

  }/*FOR*/

  PutRNGstate();

  /* add the labels to the return value. */
  minimal_data_frame(result);

  UNPROTECT(2 + has_fixed);
  return result;

}/*RBN_MASTER*/
Beispiel #6
0
/* reduce multiple boostrap strength R objects. */
SEXP bootstrap_reduce(SEXP x) {

int i = 0, j = 0, reps = length(x), nrow = 0;
double *str = NULL, *dir = NULL, *temp = NULL;
SEXP result, df, strength, direction;

  /* allocate return value. */
  PROTECT(result = allocVector(VECSXP, 4));

  /* extract the first data frame from the list. */
  df = VECTOR_ELT(x, 0);
  /* copy data frame column names. */
  setAttrib(result, R_NamesSymbol, getAttrib(df, R_NamesSymbol));
  /* copy the first two columns. */
  SET_VECTOR_ELT(result, 0, VECTOR_ELT(df, 0));
  SET_VECTOR_ELT(result, 1, VECTOR_ELT(df, 1));
  /* get the number of rows. */
  nrow = length(VECTOR_ELT(df, 0));
  /* allocate the remaining two columns. */
  PROTECT(strength = allocVector(REALSXP, nrow));
  str = REAL(strength);
  PROTECT(direction = allocVector(REALSXP, nrow));
  dir = REAL(direction);
  /* just copy over strength and direction. */
  memcpy(str, REAL(VECTOR_ELT(df, 2)), nrow * sizeof(double));
  memcpy(dir, REAL(VECTOR_ELT(df, 3)), nrow * sizeof(double));

  for (i = 1; i < reps; i++) {

    /* extract the data frame from the list. */
    df = VECTOR_ELT(x, i);
    /* accumulate strength. */
    temp = REAL(VECTOR_ELT(df, 2));
    for (j = 0; j < nrow; j++)
      str[j] += temp[j];
    /* accumulate direction. */
    temp = REAL(VECTOR_ELT(df, 3));
    for (j = 0; j < nrow; j++)
      dir[j] += temp[j];

  }/*FOR*/

  /* normalize dividing by the number of data frames. */
  for (j = 0; j < nrow; j++) {

    str[j] /= reps;
    dir[j] /= reps;

  }/*FOR*/

  /* set the last two columns. */
  SET_VECTOR_ELT(result, 2, strength);
  SET_VECTOR_ELT(result, 3, direction);
  /* make the return value a real data frame. */
  minimal_data_frame(result);

  UNPROTECT(3);

  return result;

}/*BOOTSTRAP_REDUCE*/