void build_tau(double **data, double *tau, int *ncols, int *nrows,
    int *imaginary, double *phi) {

int i = 0, j = 0, res_ncols = *ncols + 1;
double temp = 0;
double *mean = NULL, *mat = NULL;

  /* allocate mean vector and covariance matrix. */
  mean = alloc1dreal(*ncols);
  mat = alloc1dreal((*ncols) * (*ncols));

  /* compute the mean values.  */
  for (i = 0; i < *ncols; i++) {

    for (j = 0 ; j < *nrows; j++)
      mean[i] += data[i][j];

    mean[i] /= (*nrows);

  }/*FOR*/

  /* compute the covariance matrix... */
  c_covmat(data, mean, ncols, nrows, mat);
  /* ... multiply it by the phi coefficient... */
  for (i = 0; i < *ncols; i++)
    for (j = 0; j < *ncols; j++)
      mat[CMC(i, j, *ncols)] *= (*phi);

  /* ... compute the pseudoinverse... */
  c_ginv(mat, ncols, mat);

  /* ... and store it in the bottom-right corner of the tau matrix. */
  for (i = 1; i < res_ncols; i++)
    for (j = 1; j < res_ncols; j++)
      tau[CMC(i, j, res_ncols)] = mat[CMC(i - 1, j - 1, *ncols)];

  /* fill the top-right and bottom-left corners. */
  for (i = 1; i < *ncols + 1; i++) {

    temp = 0;

    for (j = 0; j < *ncols; j++)
      temp += mean[j] * mat[CMC(j, i - 1, *ncols)];

    tau[CMC(i, 0, res_ncols)] = tau[CMC(0, i, res_ncols)] = -temp;

  }/*FOR*/

  /* fill the top-left corner. */
  for (i = 1; i < res_ncols; i++)
    tau[CMC(0, 0, res_ncols)] += - mean[i - 1] * tau[CMC(i, 0, res_ncols)];

  tau[CMC(0, 0, res_ncols)] += 1/((double) *imaginary);

  /* perform the final (pseudo)inversion. */
  c_ginv(tau, &res_ncols, tau);

}/*BUILD_TAU*/
示例#2
0
/* shrinked mutual information, to be used for the asymptotic test. */
SEXP shmi(SEXP x, SEXP y, SEXP lx, SEXP ly, SEXP length) {

int i = 0, j = 0, k = 0;
double **n = NULL, *ni = NULL, *nj = NULL;
int *llx = INTEGER(lx), *lly = INTEGER(ly), *num = INTEGER(length);
int *xx = INTEGER(x), *yy = INTEGER(y);
double lambda = 0, target = 1/(double)((*llx) * (*lly));
double *res = NULL;
SEXP result;

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = alloc2dreal(*llx, *lly);
  ni = alloc1dreal(*llx);
  nj = alloc1dreal(*lly);

  /* compute the joint frequency of x and y. */
  for (k = 0; k < *num; k++) {

    n[xx[k] - 1][yy[k] - 1]++;

  }/*FOR*/

  /* estimate the optimal lambda for the data. */
  _mi_lambda((double *)n, &lambda, &target, num, llx, lly, NULL);

  /* switch to the probability scale and shrink the estimates. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++)
        n[i][j] = lambda * target + (1 - lambda) * n[i][j] / (*num);

  /* compute the marginals. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++) {

    ni[i] += n[i][j];
    nj[j] += n[i][j];

  }/*FOR*/

  /* compute the mutual information from the joint and marginal frequencies. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++) {

      if (n[i][j] != 0)
        *res += n[i][j] * log(n[i][j] / (ni[i] * nj[j]));

    }/*FOR*/

  UNPROTECT(1);

  return result;

}/*SHMI*/
/* get the number of parameters of the whole network (discrete case). */
SEXP nparams_dnet(SEXP graph, SEXP data, SEXP real, SEXP debug) {

int i = 0, j = 0, nnodes = 0;
int *index = NULL, *r = LOGICAL(real), *debuglevel = LOGICAL(debug);
double node_params = 0;
double *res = NULL, *nlevels = NULL;
SEXP nodes, node_data, parents, try, result;

  /* get nodes' number and data. */
  node_data = getListElement(graph, "nodes");
  nodes = getAttrib(node_data, R_NamesSymbol);
  nnodes = LENGTH(node_data);

  /* get the level count for each node. */
  nlevels = alloc1dreal(nnodes);
  for (i = 0; i < nnodes; i++)
    nlevels[i] = NLEVELS2(data, i);

  /* allocate and initialize the return value. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  res[0] = 0;

  /* for each node... */
  for (i = 0; i < nnodes; i++) {

    /* reset the parameter counter. */
    node_params = 1;

    /* match the parents of the node. */
    parents = getListElement(VECTOR_ELT(node_data, i), "parents");
    PROTECT(try = match(nodes, parents, 0));
    index = INTEGER(try);

    /* compute the number of configurations. */
    for (j = 0; j < LENGTH(try); j++)
      node_params *= nlevels[index[j] - 1];

    UNPROTECT(1);

    /* multiply by the number of free parameters. */
    if (*r > 0)
      node_params *= nlevels[i] - 1;
    else
      node_params *= nlevels[i];

    if (*debuglevel > 0)
      Rprintf("* node %s has %.0lf parameter(s).\n", NODE(i), node_params);

    /* update the return value. */
    res[0] += node_params;

  }/*FOR*/

  UNPROTECT(1);

  return result;

}/*NPARAMS_DNET*/
示例#4
0
/* unconditional discrete sampling. */
void rbn_discrete_root(SEXP result, int cur, SEXP cpt, int *num, int ordinal,
    SEXP fixed) {

int np = LENGTH(cpt), *gen = NULL, *workplace = NULL;
double *p = NULL;
SEXP generated, class, lvls;

  /* get the levels of the curent variable .*/
  lvls = VECTOR_ELT(getAttrib(cpt, R_DimNamesSymbol), 0);
  /* allocate the memory for the generated observations. */
  PROTECT(generated = allocVector(INTSXP, *num));
  gen = INTEGER(generated);

  if (fixed != R_NilValue) {

    int constant = INTEGER(match(lvls, fixed, 0))[0];

    for (int i = 0; i < *num; i++)
      gen[i] = constant;

  }/*THEN*/
  else {

    workplace = alloc1dcont(np);

    /* duplicate the probability table to save the original copy from tampering. */
    p = alloc1dreal(np);
    memcpy(p, REAL(cpt), np * sizeof(double));

    /* perform the random sampling. */
    ProbSampleReplace(np, p, workplace, *num, gen);

  }/*ELSE*/

  /* set up all the attributes for the newly generated observations. */
  setAttrib(generated, R_LevelsSymbol, lvls);
  if (ordinal) {

    PROTECT(class = allocVector(STRSXP, 2));
    SET_STRING_ELT(class, 0, mkChar("ordered"));
    SET_STRING_ELT(class, 1, mkChar("factor"));

  }/*THEN*/
  else {
示例#5
0
/* predict the value of a discrete node without parents. */
SEXP dpred(SEXP fitted, SEXP data, SEXP debug) {

int i = 0, nmax = 0, ndata = LENGTH(data), length = 0;
int *res = NULL, *debuglevel = LOGICAL(debug), *iscratch = NULL, *maxima = NULL;
double *prob = NULL, *dscratch = NULL, *buf = NULL;
SEXP ptab, result, tr_levels = getAttrib(data, R_LevelsSymbol);

  /* get the probabilities of the multinomial distribution. */
  ptab = getListElement(fitted, "prob");
  length = LENGTH(ptab);
  prob = REAL(ptab);

  /* create the vector of indexes. */
  iscratch = alloc1dcont(length);
  for (i = 0; i < length; i++)
    iscratch[i] = i + 1;

  /* create a scratch copy of the array. */
  buf = alloc1dreal(length);
  dscratch = alloc1dreal(length);
  memcpy(dscratch, prob, length * sizeof(double));

  /* allocate the array for the indexes of the maxima. */
  maxima = alloc1dcont(length);

  /* find out the mode(s). */
  all_max(dscratch, length, maxima, &nmax, iscratch, buf);

  /* allocate and initialize the return value. */
  PROTECT(result = allocVector(INTSXP, ndata));
  res = INTEGER(result);

  if (nmax == 1) {

    /* copy the index of the mode in the return value. */
    for (i = 0; i < ndata; i++)
      res[i] = maxima[0];

    if (*debuglevel > 0) {

      if (res[0] == NA_INTEGER)
        Rprintf("  > prediction for all observations is NA with probabilities:\n");
      else
        Rprintf("  > prediction for all observations is '%s' with probabilities:\n",
          CHAR(STRING_ELT(tr_levels, res[0] - 1)));

      Rprintf("  ");
      for (i = 0; i < LENGTH(ptab); i++)
        Rprintf("  %lf", prob[i]);
      Rprintf("\n");

    }/*THEN*/

  }/*THEN*/
  else {

    /* break ties: sample with replacement from all the maxima. */
    GetRNGstate();
    SampleReplace(ndata, nmax, res, maxima);
    PutRNGstate();

    if (*debuglevel > 0) {

      Rprintf("  > there are %d levels tied for prediction, applying tie breaking.\n", nmax);
      Rprintf("  > tied levels are:");
      for (i = 0; i < nmax; i++)
        Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[i] - 1)));
      Rprintf(".\n");

    }/*THEN*/

  }/*ELSE*/

  /* copy the labels and the class from the input data. */
  setAttrib(result, R_LevelsSymbol, tr_levels);
  setAttrib(result, R_ClassSymbol, getAttrib(data, R_ClassSymbol));

  UNPROTECT(1);

  return result;

}/*DPRED*/
示例#6
0
/* predict the value of the training variable in a naive Bayes or Tree-Augmented
 * naive Bayes classifier. */
SEXP naivepred(SEXP fitted, SEXP data, SEXP parents, SEXP training, SEXP prior,
    SEXP prob, SEXP debug) {

int i = 0, j = 0, k = 0, n = 0, nvars = LENGTH(fitted), nmax = 0, tr_nlevels = 0;
int *res = NULL, **ex = NULL, *ex_nlevels = NULL;
int idx = 0, *tr_id = INTEGER(training);
int *iscratch = NULL, *maxima = NULL, *prn = NULL, *debuglevel = LOGICAL(debug);
int *include_prob = LOGICAL(prob);
double **cpt = NULL, *pr = NULL, *scratch = NULL, *buf = NULL, *pt = NULL;
double sum = 0;
SEXP class, temp, tr, tr_levels, result, nodes, probtab, dimnames;

  /* cache the node labels. */
  nodes = getAttrib(fitted, R_NamesSymbol);

  /* cache the pointers to all the variables. */
  ex = (int **) alloc1dpointer(nvars);
  ex_nlevels = alloc1dcont(nvars);

  for (i = 0; i < nvars; i++) {

    temp = VECTOR_ELT(data, i);
    ex[i] = INTEGER(temp);
    ex_nlevels[i] = NLEVELS(temp);

  }/*FOR*/

  /* get the training variable and its levels. */
  n = LENGTH(VECTOR_ELT(data, 0));
  tr = getListElement(VECTOR_ELT(fitted, *tr_id - 1), "prob");
  tr_levels = VECTOR_ELT(getAttrib(tr, R_DimNamesSymbol), 0);
  tr_nlevels = LENGTH(tr_levels);
  /* get the prior distribution. */
  pr = REAL(prior);

  if (*debuglevel > 0) {

    Rprintf("* the prior distribution for the target variable is:\n");
    PrintValue(prior);

  }/*THEN*/

  /* allocate the scratch space used to compute posterior probabilities. */
  scratch = alloc1dreal(tr_nlevels);
  buf = alloc1dreal(tr_nlevels);

  /* cache the pointers to the conditional probability tables. */
  cpt = (double **) alloc1dpointer(nvars);

  for (i = 0; i < nvars; i++) 
    cpt[i] = REAL(getListElement(VECTOR_ELT(fitted, i), "prob"));

  /* dereference the parents' vector. */
  prn = INTEGER(parents);

  /* create the vector of indexes. */
  iscratch = alloc1dcont(tr_nlevels);

  /* allocate the array for the indexes of the maxima. */
  maxima = alloc1dcont(tr_nlevels);

  /* allocate the return value. */
  PROTECT(result = allocVector(INTSXP, n));
  res = INTEGER(result);

  /* allocate and initialize the table of the posterior probabilities. */
  if (*include_prob > 0) {

    PROTECT(probtab = allocMatrix(REALSXP, tr_nlevels, n));
    pt = REAL(probtab);
    memset(pt, '\0', n * tr_nlevels * sizeof(double));

  }/*THEN*/

  /* initialize the random seed, just in case we need it for tie breaking. */
  GetRNGstate();

  /* for each observation... */
  for (i = 0; i < n; i++) {

    /* ... reset the scratch space and the indexes array... */
    for (k = 0; k < tr_nlevels; k++) {

      scratch[k] = log(pr[k]);
      iscratch[k] = k + 1;

    }/*FOR*/

    if (*debuglevel > 0)
      Rprintf("* predicting the value of observation %d.\n", i + 1);

    /* ... and for each conditional probability table... */
    for (j = 0; j < nvars; j++) {

      /* ... skip the training variable... */
      if (*tr_id == j + 1)
        continue;

      /* ... (this is the root node of the Chow-Liu tree) ... */
      if (prn[j] == NA_INTEGER) {

        /* ... and for each row of the conditional probability table... */
        for (k = 0; k < tr_nlevels; k++) {

          if (*debuglevel > 0) {

            Rprintf("  > node %s: picking cell %d (%d, %d) from the CPT (p = %lf).\n",
              NODE(j), CMC(ex[j][i] - 1, k, ex_nlevels[j]), ex[j][i], k + 1,
              cpt[j][CMC(ex[j][i] - 1, k, ex_nlevels[j])]);

          }/*THEN*/

          /* ... update the posterior probability. */
          scratch[k] += log(cpt[j][CMC(ex[j][i] - 1, k, ex_nlevels[j])]);

        }/*FOR*/

      }/*THEN*/
      else {

        /* ... and for each row of the conditional probability table... */
        for (k = 0; k < tr_nlevels; k++) {

          /* (the first dimension corresponds to the current node [X], the second
           * to the training node [Y], the third to the only parent of the current
           * node [Z]; CMC coordinates are computed as X + Y * NX + Z * NX * NY. */
          idx = (ex[j][i] - 1) + k * ex_nlevels[j] + 
                  (ex[prn[j] - 1][i] - 1) * ex_nlevels[j] * tr_nlevels;

          if (*debuglevel > 0) {

            Rprintf("  > node %s: picking cell %d (%d, %d, %d) from the CPT (p = %lf).\n",
              NODE(j), idx, ex[j][i], k + 1, ex[prn[j] - 1][i], cpt[j][idx]);

          }/*THEN*/

          /* ... update the posterior probability. */
          scratch[k] += log(cpt[j][idx]);

        }/*FOR*/

      }/*ELSE*/

    }/*FOR*/

    /* find out the mode(s). */
    all_max(scratch, tr_nlevels, maxima, &nmax, iscratch, buf);

    /* compute the posterior probabilities on the right scale, to attach them
     * to the return value. */
    if (*include_prob) {

      /* copy the log-probabilities from scratch. */
      memcpy(pt + i * tr_nlevels, scratch, tr_nlevels * sizeof(double));

      /* transform log-probabilitiees into plain probabilities. */
      for (k = 0, sum = 0; k < tr_nlevels; k++)
        sum += pt[i * tr_nlevels + k] = exp(pt[i * tr_nlevels + k] - scratch[maxima[0] - 1]);

      /* rescale them to sum up to 1. */
      for (k = 0; k < tr_nlevels; k++)
        pt[i * tr_nlevels + k] /= sum;

    }/*THEN*/

    if (nmax == 1) {

      res[i] = maxima[0];

      if (*debuglevel > 0) {

        Rprintf("  @ prediction for observation %d is '%s' with (log-)posterior:\n",
          i + 1, CHAR(STRING_ELT(tr_levels, res[i] - 1)));

        Rprintf("  ");
        for (k = 0; k < tr_nlevels; k++)
          Rprintf("  %lf", scratch[k]);
        Rprintf("\n");

      }/*THEN*/

    }/*THEN*/
    else {

      /* break ties: sample with replacement from all the maxima. */
      SampleReplace(1, nmax, res + i, maxima);

      if (*debuglevel > 0) {

        Rprintf("  @ there are %d levels tied for prediction of observation %d, applying tie breaking.\n", nmax, i + 1);

        Rprintf("  ");
        for (k = 0; k < tr_nlevels; k++)
          Rprintf("  %lf", scratch[k]);
        Rprintf("\n");

        Rprintf("  @ tied levels are:");
        for (k = 0; k < nmax; k++)
          Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[k] - 1)));
        Rprintf(".\n");

      }/*THEN*/

    }/*ELSE*/

  }/*FOR*/

  /* save the state of the random number generator. */
  PutRNGstate();

  /* add back the attributes and the class to the return value. */
  PROTECT(class = allocVector(STRSXP, 1));
  SET_STRING_ELT(class, 0, mkChar("factor"));
  setAttrib(result, R_LevelsSymbol, tr_levels);
  setAttrib(result, R_ClassSymbol, class);

  if (*include_prob > 0) {

    /* set the levels of the taregt variable as rownames. */
    PROTECT(dimnames = allocVector(VECSXP, 2));
    SET_VECTOR_ELT(dimnames, 0, tr_levels);
    setAttrib(probtab, R_DimNamesSymbol, dimnames);
    /* add the posterior probabilities to the return value. */
    setAttrib(result, install("prob"), probtab);

    UNPROTECT(4);

  }/*THEN*/
  else {

    UNPROTECT(2);

  }/*ELSE*/

  return result;

}/*NAIVEPRED*/
示例#7
0
/* predict the value of a discrete node with one or more parents. */
SEXP cdpred(SEXP fitted, SEXP data, SEXP parents, SEXP debug) {

int i = 0, k = 0, ndata = LENGTH(data), nrows = 0, ncols = 0;
int *configs = INTEGER(parents), *debuglevel = LOGICAL(debug);
int *iscratch = NULL, *maxima = NULL, *nmax = NULL, *res = NULL;
double *prob = NULL, *dscratch = NULL, *buf = NULL;
SEXP temp, result, tr_levels = getAttrib(data, R_LevelsSymbol);

  /* get the probabilities of the multinomial distribution. */
  temp = getListElement(fitted, "prob");
  nrows = INT(getAttrib(temp, R_DimSymbol));
  ncols = LENGTH(temp) / nrows;
  prob = REAL(temp);

  /* create the vector of indexes. */
  iscratch = alloc1dcont(nrows);

  /* create a scratch copy of the array. */
  buf = alloc1dreal(nrows);
  dscratch = alloc1dreal(nrows * ncols);
  memcpy(dscratch, prob, nrows * ncols * sizeof(double));

  /* allocate the array for the indexes of the maxima. */
  maxima = alloc1dcont(nrows * ncols);

  /* allocate the maxima counters. */
  nmax = alloc1dcont(ncols);

  /* get the mode for each configuration. */
  for (i = 0; i < ncols; i++) {

    /* initialize the vector of indexes. */
    for (k = 0; k < nrows; k++)
      iscratch[k] = k + 1;

    /* find out the mode(s). */
    all_max(dscratch + i * nrows, nrows, maxima + i * nrows,
      nmax + i, iscratch, buf);

  }/*FOR*/

  /* allocate and initialize the return value. */
  PROTECT(result = allocVector(INTSXP, ndata));
  res = INTEGER(result);

  /* initialize the random seed, just in case we need it for tie breaking. */
  GetRNGstate();

  /* copy the index of the mode in the return value. */
  for (i = 0; i < ndata; i++) {

    if (nmax[configs[i] - 1] == 1) {

      res[i] = maxima[CMC(0, configs[i] - 1, nrows)];

      if (*debuglevel > 0) {

        if (res[i] == NA_INTEGER)
          Rprintf("  > prediction for observation %d is NA with probabilities:\n");
        else
          Rprintf("  > prediction for observation %d is '%s' with probabilities:\n",
            i + 1, CHAR(STRING_ELT(tr_levels, res[i] - 1)));

        Rprintf("  ");
        for (int k = 0; k < nrows; k++)
          Rprintf("  %lf", (prob + nrows * (configs[i] - 1))[k]);
        Rprintf("\n");

      }/*THEN*/

    }/*THEN*/
    else {

      /* break ties: sample with replacement from all the maxima. */
      SampleReplace(1, nmax[configs[i] - 1], res + i, maxima + (configs[i] - 1) * nrows);

      if (*debuglevel > 0) {

        Rprintf("  > there are %d levels tied for prediction of observation %d, applying tie breaking.\n",
          nmax[configs[i] - 1], i + 1);
        Rprintf("  > tied levels are:");
        for (k = 0; k < nmax[configs[i] - 1]; k++)
          Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[CMC(k, configs[i] - 1, nrows)] - 1)));
        Rprintf(".\n");

      }/*THEN*/

    }/*ELSE*/

  }/*FOR*/

  /* save the state of the random number generator. */
  PutRNGstate();

  /* copy the labels and the class from the input data. */
  setAttrib(result, R_LevelsSymbol, tr_levels);
  setAttrib(result, R_ClassSymbol, getAttrib(data, R_ClassSymbol));

  UNPROTECT(1);

  return result;

}/*CDPRED*/
示例#8
0
/* shrinked conditional mutual information, to be used for the asymptotic
 * test. */
SEXP shcmi(SEXP x, SEXP y, SEXP z, SEXP lx, SEXP ly, SEXP lz, SEXP length) {

int i = 0, j = 0, k = 0;
double ***n = NULL, **ni = NULL, **nj = NULL, *nk = NULL;
int *llx = INTEGER(lx), *lly = INTEGER(ly), *llz = INTEGER(lz);
int *num = INTEGER(length);
int *xx = INTEGER(x), *yy = INTEGER(y), *zz = INTEGER(z);
double lambda = 0, target = 1/(double)((*llx) * (*lly) * (*llz));
double *res = NULL;
SEXP result;

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* initialize the contingency table and the marginal frequencies. */
  n = alloc3dreal(*llx, *lly, *llz);
  ni = alloc2dreal(*llx, *llz);
  nj = alloc2dreal(*lly, *llz);
  nk = alloc1dreal(*llz);

  /* compute the joint frequency of x, y, and z. */
  for (k = 0; k < *num; k++) {

    n[xx[k] - 1][yy[k] - 1][zz[k] - 1]++;

  }/*FOR*/

  /* estimate the optimal lambda for the data. */
  _mi_lambda((double *)n, &lambda, &target, num, llx, lly, llz);

  /* switch to the probability scale and shrink the estimates. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++)
      for (k = 0; k < *llz; k++)
        n[i][j][k] = lambda * target + (1 - lambda) * n[i][j][k] / (*num);

  /* compute the marginals. */
  for (i = 0; i < *llx; i++)
    for (j = 0; j < *lly; j++)
      for (k = 0; k < *llz; k++) {

        ni[i][k] += n[i][j][k];
        nj[j][k] += n[i][j][k];
        nk[k] += n[i][j][k];

      }/*FOR*/

  for (k = 0; k < *llz; k++) {

    /* check each level of the conditioning variable to avoid (again)
     * "divide by zero" errors. */
    if (nk[k] == 0)
      continue;

    for (j = 0; j < *lly; j++) {

      for (i = 0; i < *llx; i++) {

        if (n[i][j][k] > 0)
          *res += n[i][j][k] * log( (n[i][j][k] * nk[k]) / (ni[i][k] * nj[j][k]) );

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  UNPROTECT(1);

  return result;

}/*SHCMI*/
示例#9
0
/* Shrinked Covariance Matrix. */
SEXP cov_lambda(SEXP data, SEXP length) {

int i = 0, j = 0, k = 0, cur = 0;
int *n = INTEGER(length), ncols = LENGTH(data);
double *mean = NULL, *var = NULL, **column = NULL;
double lambda = 0, sumcors = 0, sumvars = 0;
SEXP res;

  /* allocate the covariance matrix. */
  PROTECT(res = allocMatrix(REALSXP, ncols, ncols));
  var = REAL(res);
  memset(var, '\0', ncols * ncols * sizeof(double));

  /* allocate an array to store the mean values. */
  mean = alloc1dreal(ncols);

  /* allocate and initialize an array of pointers for the variables. */
  column = (double **) alloc1dpointer(ncols);
  for (i = 0; i < ncols; i++)
    column[i] = REAL(VECTOR_ELT(data, i));

  /* compute the mean values  */
  for (i = 0; i < ncols; i++) {

    for (j = 0 ; j < *n; j++)
      mean[i] += column[i][j];

    mean[i] /= (*n);

  }/*FOR*/

  for (i = 0; i < ncols; i++) {

    for (j = i; j < ncols; j++) {

      cur = CMC(i, j, ncols);

      /* compute the actual variance/covariance. */
      for (k = 0; k < *n; k++)
        var[cur] += (column[i][k] - mean[i]) * (column[j][k] - mean[j]);

      if (i != j) {

        /* do the first round of computations for the shrinkage intensity. */
        for (k = 0; k < *n; k++) {

          sumvars +=
            ((column[i][k] - mean[i]) * (column[j][k] - mean[j]) - var[cur] / (*n)) *
            ((column[i][k] - mean[i]) * (column[j][k] - mean[j]) - var[cur] / (*n));

        }/*FOR*/

        sumcors += (var[cur] / (*n - 1)) * (var[cur] / (*n - 1));

      }/*THEN*/

      /* use the unbiased estimator for variances/covariances. */
      var[cur] /= (*n) - 1;

      /* fill in the symmetric element of the matrix. */
      var[CMC(j, i, ncols)] = var[cur];

    }/*FOR*/

  }/*FOR*/

  /* wrap up the computation of the shrinkage intensity. */
  lambda = sumvars * (*n) / (*n - 1) / (*n -1) / (*n -1) / sumcors;

  /* truncate the shrinkage intensity in the [0,1] interval; this is not an
   * error, but a measure to increase the quality of the shrinked estimate. */
  if (lambda > 1) {

    lambda = 1;

  }/*THEN*/
  else if (lambda < 0) {

    lambda = 0;

  }/*THEN*/

  /* shrink the covariance matrix (except the diagonal, which stays the same). */
  for (i = 0; i < ncols; i++)
    for (j = 0; j < ncols; j++)
      if (i != j)
        var[CMC(i, j, ncols)] *= 1 - lambda;

  UNPROTECT(1);
  return res;

}/*COV_LAMBDA*/
示例#10
0
/* predict the values of one or more variables given one or more variables by
 * maximum a posteriori (MAP). */
SEXP mappred(SEXP node, SEXP fitted, SEXP data, SEXP n, SEXP from, SEXP debug) {

int i = 0, j = 0, k = 0, nobs = 0, nev = 0, nlvls = 0;
int *vartypes = NULL, nsims = INT(n), debuglevel = isTRUE(debug);
void **varptrs = NULL, **evptrs = NULL, *pred = NULL, *res = NULL;
SEXP result, colnames, evidence, evmatch, temp = R_NilValue;
SEXP cpdist, predicted, lvls = R_NilValue;
double *wgt = NULL;
long double *lvls_counts = NULL;

  /* extract the names of the variables in the data. */
  colnames = getAttrib(data, R_NamesSymbol);

  /* remove the name of the variable to predict. */
  nev = length(from);
  PROTECT(evmatch = match(colnames, from, 0));

  /* cache variable types and pointers. */
  vartypes = alloc1dcont(nev);
  varptrs = alloc1dpointer(nev);
  for (j = 0, k = 0; j < nev; j++) {

    temp = VECTOR_ELT(data, INTEGER(evmatch)[j] - 1);
    vartypes[k] = TYPEOF(temp);
    varptrs[k++] = DATAPTR(temp);

  }/*FOR*/

  /* cache the sample size. */
  nobs = length(temp);

  /* allocate a list to hold the evidence. */
  PROTECT(evidence = allocVector(VECSXP, nev));
  setAttrib(evidence, R_NamesSymbol, from);

  /* cache pointers to the elements of the evidence .*/
  evptrs = alloc1dpointer(nev);

  for (j = 0; j < nev; j++) {

    PROTECT(temp = allocVector(vartypes[j], 1));
    evptrs[j] = DATAPTR(temp);
    SET_VECTOR_ELT(evidence, j, temp);
    UNPROTECT(1);

  }/*FOR*/

  /* make the evidence a data frame to compact debugging output. */
  minimal_data_frame(evidence);

  /* allocate the return value. */
  PROTECT(result = fitnode2df(fitted, STRING_ELT(node, 0), nobs));
  res = DATAPTR(result);

  /* in the case of discrete variables, allocate scratch space for levels'
   * frequencies. */
  if (TYPEOF(result) == INTSXP) {

    lvls = getAttrib(result, R_LevelsSymbol);
    nlvls = length(lvls);
    lvls_counts = allocldouble(nlvls);

  }/*THEN*/

  /* allocate the weights. */
  wgt = alloc1dreal(nsims);

  /* allocate sratch space for the random samplings. */
  PROTECT(cpdist = fit2df(fitted, nsims));
  predicted = getListElement(cpdist, (char *)CHAR(STRING_ELT(node, 0)));
  pred = DATAPTR(predicted);

  /* iterate over the observations. */
  for (i = 0; i < nobs; i++) {

    /* copy the values into the list. */
    for (j = 0; j < nev; j++) {

      switch(vartypes[j]) {

        case REALSXP:

          *((double *)evptrs[j]) = ((double *)varptrs[j])[i];
          break;

        case INTSXP:

          *((int *)evptrs[j]) = ((int *)varptrs[j])[i];
          break;

      }/*SWITCH*/

    }/*FOR*/

    if (debuglevel > 0) {

      Rprintf("* predicting observation %d conditional on:\n", i);
      PrintValue(evidence);

    }/*THEN*/

    /* generate samples from the conditional posterior distribution. */
    c_rbn_master(fitted, cpdist, n, evidence, FALSE);
    /* compute the weights. */
    c_lw_weights(fitted, cpdist, nsims, wgt, from, FALSE);

    /* compute the posterior estimate. */
    switch(TYPEOF(predicted)) {

      case REALSXP:

        /* average the predicted values. */
        ((double *)res)[i] = posterior_mean((double *)pred, wgt, nsims,
                               debuglevel);
        break;

      case INTSXP:

        /* pick the most frequent value. */
        ((int *)res)[i] = posterior_mode((int *)pred, wgt, nsims, lvls_counts,
                            lvls, nlvls, debuglevel);
        break;

    }/*SWITCH*/

  }/*FOR*/

  UNPROTECT(4);

  return result;

}/*MAPPRED*/
示例#11
0
/* Chow-Liu structure learning algorithm. */
SEXP chow_liu(SEXP data, SEXP nodes, SEXP estimator, SEXP whitelist,
    SEXP blacklist, SEXP conditional, SEXP debug) {

int i = 0, j = 0, k = 0, debug_coord[2], ncols = LENGTH(data);
int num = LENGTH(VECTOR_ELT(data, i)), narcs = 0, nwl = 0, nbl = 0;
int *nlevels = NULL, *clevels = NULL, *est = INTEGER(estimator);
int *wl = NULL, *bl = NULL, *poset = NULL, *debuglevel = LOGICAL(debug);
void **columns = NULL, *cond = NULL;
short int *include = NULL;
double *mim = NULL;
SEXP arcs, wlist, blist;

  /* dereference the columns of the data frame. */
  DEREFERENCE_DATA_FRAME()

  /* only TAN uses a conditional variable, so assume it's discrete and go ahead. */
  if (conditional != R_NilValue) {

    cond = (void *) INTEGER(conditional);
    clevels = alloc1dcont(1);
    *clevels = NLEVELS(conditional); 

  }/*THEN*/

  /* allocate the mutual information matrix and the status vector. */
  mim = alloc1dreal(UPTRI3_MATRIX(ncols));
  include = allocstatus(UPTRI3_MATRIX(ncols));

  /* compute the pairwise mutual information coefficients. */
  if (*debuglevel > 0)
    Rprintf("* computing pairwise mutual information coefficients.\n");

  mi_matrix(mim, columns, ncols, nlevels, &num, cond, clevels, est);

  LIST_MUTUAL_INFORMATION_COEFS()

  /* add whitelisted arcs first. */
  if ((!isNull(whitelist)) && (LENGTH(whitelist) > 0)) {

    PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE));
    wl = INTEGER(wlist);
    nwl = LENGTH(wlist);

    for (i = 0; i < nwl; i++) {

      if (*debuglevel > 0) {

        Rprintf("* adding whitelisted arcs first.\n");

        if (include[wl[i]] == 0) {

          Rprintf("  > arc %s - %s has been added to the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was already present in the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (include[wl[i]] == 0)
        narcs++;
      /* include the arc in the graph. */
      include[wl[i]] = 1;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  /* cache blacklisted arcs. */
  if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0)) {

    PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE));
    bl = INTEGER(blist);
    nbl = LENGTH(blist);

  }/*THEN*/

  /* sort the mutual information coefficients and keep track of the elements' index.  */
  poset = alloc1dcont(UPTRI3_MATRIX(ncols));
  for (i = 0; i < UPTRI3_MATRIX(ncols); i++)
    poset[i] = i;
  R_qsort_I(mim, poset, 1, UPTRI3_MATRIX(ncols));

  for (i = UPTRI3_MATRIX(ncols) - 1; i > 0; i--) {

    /* get back the coordinates from the position in the half-matrix. */
    INV_UPTRI3(poset[i], ncols, debug_coord);

    /* already included all the arcs we had to, exiting. */
    if (narcs >= ncols - 1)
      break;
    /* arc already present in the graph, nothing to do. */
    if (include[poset[i]] == 1)
      continue;

    if (bl) {

      if (chow_liu_blacklist(bl, &nbl, poset + i)) {

        if (*debuglevel > 0) {

          Rprintf("* arc %s - %s is blacklisted, skipping.\n",
            NODE(debug_coord[0]), NODE(debug_coord[1]));

        }/*THEN*/

        continue;

      }/*THEN*/

    }/*THEN*/

    if (c_uptri3_path(include, debug_coord[0], debug_coord[1], ncols, nodes, FALSE)) {

      if (*debuglevel > 0) {

        Rprintf("* arc %s - %s introduces cycles, skipping.\n",
          NODE(debug_coord[0]), NODE(debug_coord[1]));

      }/*THEN*/

      continue;

    }/*THEN*/

    if (*debuglevel > 0) {

      Rprintf("* adding arc %s - %s with mutual information %lf.\n",
        NODE(debug_coord[0]), NODE(debug_coord[1]), mim[i]);

    }/*THEN*/

    /* include the arc in the graph. */
    include[poset[i]] = 1;
    /* update the counter. */
    narcs++;

  }/*FOR*/

  if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0))
    UNPROTECT(1);

  /* sanity check for blacklist-related madnes. */
  if (narcs != ncols - 1)
    error("learned %d arcs instead of %d, this is not a tree spanning all the nodes.",
      narcs, ncols - 1);

  CONVERT_TO_ARC_SET(include, 0, 2 * (ncols - 1));

  return arcs;

}/*CHOW_LIU*/
示例#12
0
/* ARACNE structure learning algorithm. */
SEXP aracne(SEXP data, SEXP estimator, SEXP whitelist, SEXP blacklist, SEXP debug) {

int i = 0, j = 0, k = 0, coord = 0, ncols = LENGTH(data);
int num = LENGTH(VECTOR_ELT(data, i)), narcs = ncols * (ncols - 1) / 2;
int *nlevels = NULL, *est = INTEGER(estimator), *wl = NULL, *bl = NULL;
int *debuglevel = LOGICAL(debug);
void **columns = NULL;
short int *exclude = NULL;
double *mim = NULL;
SEXP arcs, nodes, wlist, blist;

  nodes = getAttrib(data, R_NamesSymbol);

  /* dereference the columns of the data frame. */
  DEREFERENCE_DATA_FRAME()

  /* allocate the mutual information matrix and the status vector. */
  mim = alloc1dreal(UPTRI3_MATRIX(ncols));
  exclude = allocstatus(UPTRI3_MATRIX(ncols));

  /* compute the pairwise mutual information coefficients. */
  if (*debuglevel > 0)
    Rprintf("* computing pairwise mutual information coefficients.\n");

  mi_matrix(mim, columns, ncols, nlevels, &num, NULL, NULL, est);

  LIST_MUTUAL_INFORMATION_COEFS()

  /* compare all the triplets. */
  for (i = 0; i < ncols; i++) {

    for (j = i + 1; j < ncols; j++) {

      for (k = 0; k < ncols; k++) {

        if ((k == i) || (k == j))
          continue;

        /* cache the UPTRI3 coordinates of the arc. */
        coord = UPTRI3(i + 1, j + 1, ncols);

        /* if MI(X, Y) < min(MI(X, Z), MI(Z, Y)) drop arc X - Y. */
        if ((mim[coord] < mim[UPTRI3(i + 1, k + 1, ncols)]) &&
            (mim[coord] < mim[UPTRI3(j + 1, k + 1, ncols)])) {

          if (*debuglevel > 0) {

            Rprintf("* dropping arc %s - %s because of %s, %lf < min(%lf, %lf)\n",
              NODE(i), NODE(j), NODE(k), mim[UPTRI3(i + 1, j + 1, ncols)],
              mim[UPTRI3(i + 1, k + 1, ncols)], mim[UPTRI3(j + 1, k + 1, ncols)]);

          }/*THEN*/

          /* update the status vector. */
          exclude[coord] = 1;
          /* decrement the number of arcs. */
          narcs--;

          break;

        }/*THEN*/

      }/*FOR*/

    }/*FOR*/

  }/*FOR*/

  /* add back whitelisted arcs. */
  if ((!isNull(whitelist)) && (LENGTH(whitelist) > 0)) {

    PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE));
    wl = INTEGER(wlist);

    for (i = 0; i < LENGTH(wlist); i++) {

      if (*debuglevel > 0) {

        Rprintf("* adding back whitelisted arcs.\n");

        if (exclude[wl[i]] == 1) {

          Rprintf("  > arc %s - %s has been added to the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + LENGTH(wlist))));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was already present in the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + LENGTH(wlist))));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (exclude[wl[i]] == 1)
        narcs++;
      /* include the arc in the graph. */
      exclude[wl[i]] = 0;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  /* remove blacklisted arcs. */
  if ((!isNull(blacklist)) && (LENGTH(blacklist) > 0)) {

    PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE));
    bl = INTEGER(blist);

    for (i = 0; i < LENGTH(blist); i++) {

      if (*debuglevel > 0) {

        Rprintf("* removing blacklisted arcs.\n");

        if (exclude[bl[i]] == 0) {

          Rprintf("  > arc %s - %s has been dropped from the graph.\n",
            CHAR(STRING_ELT(blacklist, i)), CHAR(STRING_ELT(blacklist, i + LENGTH(blist))));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was not present in the graph.\n",
            CHAR(STRING_ELT(blacklist, i)), CHAR(STRING_ELT(blacklist, i + LENGTH(blist))));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (exclude[bl[i]] == 0)
        narcs--;
      /* remove the arc from the graph. */
      exclude[bl[i]] = 1;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  CONVERT_TO_ARC_SET(exclude, 1, 2 * narcs);

  return arcs;

}/*ARACNE*/
示例#13
0
/* conditional Monte Carlo simulation for correlation-based tests. */
SEXP gauss_cmcarlo(SEXP data, SEXP length, SEXP samples, SEXP test, SEXP alpha) {

int j = 0, k = 0, ncols = LENGTH(data), errcode = 0, *work = NULL, *perm = NULL;
int error_counter = 0, *B = INTEGER(samples), *num = INTEGER(length);
double observed = 0, permuted = 0, *yperm = NULL, *yorig = NULL, *res = NULL;
double enough = ceil(NUM(alpha) * (*B)) + 1;
double **column = NULL, *mean = NULL, *covariance = NULL, *covariance_backup = NULL;
double *u = NULL, *d = NULL, *vt = NULL;
SEXP result;

  /* allocate the matrices needed for the SVD decomposition. */
  u = alloc1dreal(ncols * ncols);
  d = alloc1dreal(ncols);
  vt = alloc1dreal(ncols * ncols);

  /* allocate and initialize the result. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* allocate and initialize an array of pointers for the variables. */
  column = (double **) alloc1dpointer(ncols);
  for (j = 0; j < ncols; j++)
    column[j] = REAL(VECTOR_ELT(data, j));

  /* cache the means of the variables (they are invariant under permutation). */
  mean = alloc1dreal(ncols);

  /* compute the mean values  */
  for (j = 0; j < ncols; j++) {

    for (k = 0 ; k < *num; k++)
      mean[j] += column[j][k];

    mean[j] /= (*num);

  }/*FOR*/

  /* allocate and initialize the covariance matrix. */
  covariance = alloc1dreal(ncols * ncols);
  covariance_backup = alloc1dreal(ncols * ncols);
  c_covmat(column, mean, &ncols, num, covariance);
  memcpy(covariance_backup, covariance, ncols * ncols * sizeof(double));

  /* substitute the original data with the fake column that will be permuted. */
  yperm = alloc1dreal(*num);
  yorig = column[1];
  memcpy(yperm, yorig, *num * sizeof(double));
  column[1] = yperm;

   /* allocate the arrays needed by RandomPermutation. */
  perm = alloc1dcont(*num);
  work = alloc1dcont(*num);

  /* initialize the random number generator. */
  GetRNGstate();

  /* pick up the observed value of the test statistic, then generate a set of
     random permutations (all variable but the second are fixed) and check how
     many tests are greater (in absolute value) than the original one.*/
  switch(INT(test)) {

    case GAUSSIAN_MUTUAL_INFORMATION:
    case LINEAR_CORRELATION:
    case FISHER_Z:
      observed = c_fast_pcor(covariance, &ncols, u, d, vt, &errcode);

      if (errcode)
        error("an error (%d) occurred in the call to dgesvd().\n", errcode);

      for (j = 0; j < (*B); j++) {

        /* reset the error flag of the SVD Fortran routine. */
        errcode = 0;

        RandomPermutation(*num, perm, work);

        for (k = 0; k < *num; k++)
          yperm[k] = yorig[perm[k]];

        /* restore the covariance matrix from the good copy. */
        memcpy(covariance, covariance_backup, ncols * ncols * sizeof(double));
        /* update the relevant covariances. */
        c_update_covmat(column, mean, 1, &ncols, num, covariance);

        permuted = c_fast_pcor(covariance, &ncols, u, d, vt, &errcode);

        if (errcode != 0)
          error_counter++;

        if (fabs(permuted) > fabs(observed)) {

          sequential_counter_check(*res);

        }/*THEN*/

      }/*FOR*/

    if (error_counter > 0)
      warning("unable to compute %d permutations due to errors in dgesvd().\n",
        error_counter);

    break;

  }/*SWITCH*/

  PutRNGstate();

  /* save the observed p-value. */
  *res /= *B;

  UNPROTECT(1);

  return result;

}/*GAUSS_CMCARLO*/
示例#14
0
/* unconditional Monte Carlo simulation for correlation-based tests. */
SEXP gauss_mcarlo(SEXP x, SEXP y, SEXP samples, SEXP test, SEXP alpha) {

int j = 0, k = 0, num = LENGTH(x), *B = INTEGER(samples);
double *xx = REAL(x), *yy = REAL(y), *yperm = NULL, *res = NULL;
double observed = 0, enough = ceil(NUM(alpha) * (*B)) + 1, xm = 0, ym = 0;
int *perm = NULL, *work = NULL;
SEXP result;

  /* allocate the arrays needed by RandomPermutation. */
  perm = alloc1dcont(num);
  work = alloc1dcont(num);

  /* allocate the array for the pemutations. */
  yperm = alloc1dreal(num);

  /* cache the means of the two variables (they are invariant under permutation). */
  for (j = 0; j < num; j++) {

    xm += xx[j];
    ym += yy[j];

  }/*FOR*/

  xm /= num;
  ym /= num;

  /* allocate the result. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* initialize the random number generator. */
  GetRNGstate();

  /* pick up the observed value of the test statistic, then generate a set of
     random permutations (all variable but the second are fixed) and check how
     many tests are greater (in absolute value) than the original one.*/
  switch(INT(test)) {

    case GAUSSIAN_MUTUAL_INFORMATION:
    case LINEAR_CORRELATION:
    case FISHER_Z:
      observed = _cov(xx, yy, &xm, &ym, &num);

      for (j = 0; j < *B; j++) {

        RandomPermutation(num, perm, work);

        for (k = 0; k < num; k++)
          yperm[k] = yy[perm[k]];

        if (fabs(_cov(xx, yperm, &xm, &ym, &num)) > fabs(observed)) {

          sequential_counter_check(*res);

        }/*THEN*/

      }/*FOR*/

    break;

  }/*SWITCH*/

  PutRNGstate();

  /* save the observed p-value. */
  *res /= *B;

  UNPROTECT(1);

  return result;

}/*GAUSS_MCARLO*/
示例#15
0
SEXP cwpost(SEXP x, SEXP z, SEXP imaginary, SEXP phi_coef) {

int i = 0, j = 0, k = 0;
int ncols = LENGTH(z), num = LENGTH(x), tau_ncols = LENGTH(z) + 1;
int *iss = INTEGER(imaginary), rho = *iss + ncols;
double logscale = 0, logk = 0, xprod = 0, var_x = 0, zi_mu = 0, phi = 0;
double *xx = REAL(x), *phic = REAL(phi_coef), *workspace = NULL;
double *res = NULL, **zz = NULL, *zi = NULL, *mu = NULL, *delta_mu = NULL;
double *tau = NULL, *invtau = NULL, *old_tau = NULL, *old_mu = NULL;
SEXP result;

  /* allocate a workspace vector. */
  workspace = alloc1dreal(tau_ncols);  

  /* allocate and initialize the parent configuration. */
  zi = alloc1dreal(ncols + 1);
  zi[0] = 1;

  /* estimate mu and var_x. */
  mu = alloc1dreal(tau_ncols);
  old_mu = alloc1dreal(tau_ncols);
  delta_mu = alloc1dreal(tau_ncols);

  for (i = 0; i < num; i++)
    mu[0] += xx[i];
  mu[0] /= num;

  for (i = 0; i < num; i++)
    var_x += (xx[i] - mu[0]) * (xx[i] - mu[0]);
  var_x /= num - 1;

  /* initialize phi. */
  phi = var_x * (*phic);

  /* allocate and initialize an array of pointers for the variables. */
  zz = (double **) alloc1dpointer(ncols);
  for (j = 0; j < ncols; j++)
    zz[j] = REAL(VECTOR_ELT(z, j));

  /* allocate and initialize tau. */
  tau = alloc1dreal(tau_ncols * tau_ncols);  
  old_tau = alloc1dreal(tau_ncols * tau_ncols);  
  invtau = alloc1dreal(tau_ncols * tau_ncols);  
  build_tau(zz, tau, &ncols, &num, iss, phic);
  memcpy(old_tau, tau, tau_ncols * tau_ncols * sizeof(double));
  c_ginv(tau, &tau_ncols, invtau);

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* for each sample... */
  for (i = 0; i < num; i++) {

    /* ... extract the values of the parents ... */
    for (j = 0; j < ncols; j++)
      zi[j + 1] = zz[j][i];

    /* ... compute the Mahalanobis distance of z[i] ... */
    xprod = c_quadratic(zi, &tau_ncols, invtau, zi, workspace);

    /* ... compute the scale factor ... */
    logscale = log(phi) + log1p(xprod);
    logk = lgammafn(0.5 * (1 + rho)) - lgammafn(0.5 * rho);
    logk -= 0.5 * (logscale + log(M_PI));

    /* and then the score for the variable. */
    for (j = 0, zi_mu = 0; j < tau_ncols; j++)
      zi_mu += zi[j] * mu[j];

    *res += logk - 0.5 * (1 + rho) * 
             log1p((xx[i] - zi_mu) * (xx[i] - zi_mu) / exp(logscale));

    /* For the next iteration, update the tau matrix ... */
    memcpy(old_tau, tau, tau_ncols * tau_ncols * sizeof(double));

    for (j = 0; j < tau_ncols; j++)
      for (k = j; k < tau_ncols; k++)
        tau[CMC(j, k, tau_ncols)] = tau[CMC(k, j, tau_ncols)] = 
          tau[CMC(j, k, tau_ncols)] + zi[j] * zi[k];

    /* ... its inverse  ... */
    c_finv(tau, &tau_ncols, invtau);

    /* ... update the mu vector ... */
    memcpy(old_mu, mu, tau_ncols * sizeof(double));
    c_rotate(invtau, old_tau, mu, &(xx[i]), zi, &tau_ncols, workspace);

    /* ... update rho (ISS + sample size evaluated at the current iteration) ... */
    rho++;

    /* ... and update phi. */
    for (j = 0; j < tau_ncols; j++)
      delta_mu[j] = old_mu[j] - mu[j];
    for (j = 0, zi_mu = 0; j < tau_ncols; j++)
      zi_mu += zi[j] * mu[j];

    phi += (xx[i] - zi_mu) * xx[i] +
             c_quadratic(delta_mu, &tau_ncols, old_tau, old_mu, workspace); 

  }/*FOR*/

  UNPROTECT(1);
  return result;

}/*CWPOST*/
示例#16
0
SEXP hc_to_be_added(SEXP arcs, SEXP blacklist, SEXP whitelist, SEXP nparents,
    SEXP maxp, SEXP nodes, SEXP convert) {

int i = 0, j = 0, narcs = 0, dims = length(nodes);
int *a = NULL, *coords = NULL;
double *mp = REAL(maxp), *np = NULL;
short int duplicated = 0;
SEXP try, result = R_NilValue, result2;

  /* transform the arc set into an adjacency matrix, if it's not one already. */
  if (isInteger(arcs)) {

    if ((duplicated = NAMED(arcs)) > 0)
      PROTECT(result = duplicate(arcs));

  }/*THEN*/
  else {

    PROTECT(result = arcs2amat(arcs, nodes));

  }/*ELSE*/

  /* dereference the adjacency matrix once and for all. */
  a = INTEGER(result);

  /* compute the number the parents of each node, unless provided. */
  if (nparents == R_NilValue) {

    np = alloc1dreal(dims);
    for (i = 0; i < dims; i++)
      for (j = 0; j < dims; j++)
        np[j] = a[CMC(i, j, dims)];

  }/*THEN*/
  else {

    np = REAL(nparents);

  }/*ELSE*/

  /* flip all the nondiagonal cells. */
  for (j = 0; j < dims; j++) {

    for (i = 0; i < dims; i++) {

      /* diagonal elements are always equal to zero, skip them. */
      if (i == j)
        continue;

      a[CMC(i, j, dims)] = 1 - a[CMC(i, j, dims)];

    }/*FOR*/

  }/*FOR*/

  /* if an arc is present in the graph in one direction, you cannot add it in
   * the other direction (it would be a reversal); flip both in the adjacency
   * matrix. */
  for (j = 0; j < dims; j++)
    for (i = j + 1; i < dims; i++)
      a[CMC(j, i, dims)] = a[CMC(i, j, dims)] = a[CMC(i, j, dims)] * a[CMC(j, i, dims)];

  /* if a node has already reached its maximum number parents, do not add
   * more arcs pointing to that node. */
  for (j = 0; j < dims; j++)
    if (np[j] >= *mp)
      memset(a + j * dims, '\0', dims * sizeof(int));

#define FLIP_FROM_LIST(list, value) \
  if (!isNull(list)) { \
    if (!isInteger(list)) { \
      PROTECT(try = match(nodes, list, 0)); \
      coords = INTEGER(try); \
      narcs = length(try)/2; \
      for (i = 0; i < narcs; i++)  \
        a[CMC(coords[i] - 1, coords[i + narcs] - 1, dims)] = value; \
      UNPROTECT(1); \
    }/*THEN*/ \
    else { \
      coords = INTEGER(list); \
      for (i = 0; i < dims * dims; i ++) \
        if (coords[i] == 1) \
          a[i] = value; \
    }/*ELSE*/ \
  }/*THEN*/

  /* now the blacklist gets involved. */
  FLIP_FROM_LIST(blacklist, 0);
  /* and, last but not least, the whitelist gets involved. */
  FLIP_FROM_LIST(whitelist, 1);

  /* return either the adjacency matrix or the arc set. */
  if (isTRUE(convert)) {

    PROTECT(result2 = amat2arcs(result, nodes));

    if ((duplicated > 0) || !isInteger(arcs))
      UNPROTECT(2);
    else
      UNPROTECT(1);
    return result2;

  }/*THEN*/
  else {

    if ((duplicated > 0) || !isInteger(arcs))
      UNPROTECT(1);
    return result;

  }/*ELSE*/

}/*HC_TO_BE_ADDED*/