Пример #1
0
/* predict the value of a gaussian node with one or more parents. */
SEXP cgpred(SEXP fitted, SEXP data, SEXP debug)  {

int i = 0, j = 0, ndata = LENGTH(VECTOR_ELT(data, 0)), ncols = LENGTH(data);
int *debuglevel = LOGICAL(debug);
double *res = NULL, *coefs = NULL;
double **columns = NULL;
SEXP result;

  /* get the coefficient of the linear regression. */
  coefs = REAL(getListElement(fitted, "coefficients"));

  /* allocate and initialize the return value. */
  PROTECT(result = allocVector(REALSXP, ndata));
  res = REAL(result);

  /* dereference the columns of the data frame. */
  columns = (double **) alloc1dpointer(ncols);
  for (i = 0; i < ncols; i++)
    columns[i] = REAL(VECTOR_ELT(data, i));

  for (i = 0; i < ndata; i++) {

    /* compute the mean value for this observation. */
    res[i] = coefs[0];

    for (j = 0; j < ncols; j++)
      res[i] += columns[j][i] * coefs[j + 1];

    if (*debuglevel > 0) {

      Rprintf("  > prediction for observation %d is %lf with predictor:\n",
        i + 1, res[i]);

      Rprintf("    (%lf) + (%lf) * (%lf)", coefs[0], columns[0][i], coefs[1]);
      for (j = 1; j < ncols; j++)
        Rprintf(" + (%lf) * (%lf)", columns[j][i], coefs[j + 1]);
      Rprintf("\n");

    }/*THEN*/

  }/*FOR*/

  UNPROTECT(1);

  return result;

}/*CGPRED*/
Пример #2
0
/* predict the value of the training variable in a naive Bayes or Tree-Augmented
 * naive Bayes classifier. */
SEXP naivepred(SEXP fitted, SEXP data, SEXP parents, SEXP training, SEXP prior,
    SEXP prob, SEXP debug) {

int i = 0, j = 0, k = 0, n = 0, nvars = LENGTH(fitted), nmax = 0, tr_nlevels = 0;
int *res = NULL, **ex = NULL, *ex_nlevels = NULL;
int idx = 0, *tr_id = INTEGER(training);
int *iscratch = NULL, *maxima = NULL, *prn = NULL, *debuglevel = LOGICAL(debug);
int *include_prob = LOGICAL(prob);
double **cpt = NULL, *pr = NULL, *scratch = NULL, *buf = NULL, *pt = NULL;
double sum = 0;
SEXP class, temp, tr, tr_levels, result, nodes, probtab, dimnames;

  /* cache the node labels. */
  nodes = getAttrib(fitted, R_NamesSymbol);

  /* cache the pointers to all the variables. */
  ex = (int **) alloc1dpointer(nvars);
  ex_nlevels = alloc1dcont(nvars);

  for (i = 0; i < nvars; i++) {

    temp = VECTOR_ELT(data, i);
    ex[i] = INTEGER(temp);
    ex_nlevels[i] = NLEVELS(temp);

  }/*FOR*/

  /* get the training variable and its levels. */
  n = LENGTH(VECTOR_ELT(data, 0));
  tr = getListElement(VECTOR_ELT(fitted, *tr_id - 1), "prob");
  tr_levels = VECTOR_ELT(getAttrib(tr, R_DimNamesSymbol), 0);
  tr_nlevels = LENGTH(tr_levels);
  /* get the prior distribution. */
  pr = REAL(prior);

  if (*debuglevel > 0) {

    Rprintf("* the prior distribution for the target variable is:\n");
    PrintValue(prior);

  }/*THEN*/

  /* allocate the scratch space used to compute posterior probabilities. */
  scratch = alloc1dreal(tr_nlevels);
  buf = alloc1dreal(tr_nlevels);

  /* cache the pointers to the conditional probability tables. */
  cpt = (double **) alloc1dpointer(nvars);

  for (i = 0; i < nvars; i++) 
    cpt[i] = REAL(getListElement(VECTOR_ELT(fitted, i), "prob"));

  /* dereference the parents' vector. */
  prn = INTEGER(parents);

  /* create the vector of indexes. */
  iscratch = alloc1dcont(tr_nlevels);

  /* allocate the array for the indexes of the maxima. */
  maxima = alloc1dcont(tr_nlevels);

  /* allocate the return value. */
  PROTECT(result = allocVector(INTSXP, n));
  res = INTEGER(result);

  /* allocate and initialize the table of the posterior probabilities. */
  if (*include_prob > 0) {

    PROTECT(probtab = allocMatrix(REALSXP, tr_nlevels, n));
    pt = REAL(probtab);
    memset(pt, '\0', n * tr_nlevels * sizeof(double));

  }/*THEN*/

  /* initialize the random seed, just in case we need it for tie breaking. */
  GetRNGstate();

  /* for each observation... */
  for (i = 0; i < n; i++) {

    /* ... reset the scratch space and the indexes array... */
    for (k = 0; k < tr_nlevels; k++) {

      scratch[k] = log(pr[k]);
      iscratch[k] = k + 1;

    }/*FOR*/

    if (*debuglevel > 0)
      Rprintf("* predicting the value of observation %d.\n", i + 1);

    /* ... and for each conditional probability table... */
    for (j = 0; j < nvars; j++) {

      /* ... skip the training variable... */
      if (*tr_id == j + 1)
        continue;

      /* ... (this is the root node of the Chow-Liu tree) ... */
      if (prn[j] == NA_INTEGER) {

        /* ... and for each row of the conditional probability table... */
        for (k = 0; k < tr_nlevels; k++) {

          if (*debuglevel > 0) {

            Rprintf("  > node %s: picking cell %d (%d, %d) from the CPT (p = %lf).\n",
              NODE(j), CMC(ex[j][i] - 1, k, ex_nlevels[j]), ex[j][i], k + 1,
              cpt[j][CMC(ex[j][i] - 1, k, ex_nlevels[j])]);

          }/*THEN*/

          /* ... update the posterior probability. */
          scratch[k] += log(cpt[j][CMC(ex[j][i] - 1, k, ex_nlevels[j])]);

        }/*FOR*/

      }/*THEN*/
      else {

        /* ... and for each row of the conditional probability table... */
        for (k = 0; k < tr_nlevels; k++) {

          /* (the first dimension corresponds to the current node [X], the second
           * to the training node [Y], the third to the only parent of the current
           * node [Z]; CMC coordinates are computed as X + Y * NX + Z * NX * NY. */
          idx = (ex[j][i] - 1) + k * ex_nlevels[j] + 
                  (ex[prn[j] - 1][i] - 1) * ex_nlevels[j] * tr_nlevels;

          if (*debuglevel > 0) {

            Rprintf("  > node %s: picking cell %d (%d, %d, %d) from the CPT (p = %lf).\n",
              NODE(j), idx, ex[j][i], k + 1, ex[prn[j] - 1][i], cpt[j][idx]);

          }/*THEN*/

          /* ... update the posterior probability. */
          scratch[k] += log(cpt[j][idx]);

        }/*FOR*/

      }/*ELSE*/

    }/*FOR*/

    /* find out the mode(s). */
    all_max(scratch, tr_nlevels, maxima, &nmax, iscratch, buf);

    /* compute the posterior probabilities on the right scale, to attach them
     * to the return value. */
    if (*include_prob) {

      /* copy the log-probabilities from scratch. */
      memcpy(pt + i * tr_nlevels, scratch, tr_nlevels * sizeof(double));

      /* transform log-probabilitiees into plain probabilities. */
      for (k = 0, sum = 0; k < tr_nlevels; k++)
        sum += pt[i * tr_nlevels + k] = exp(pt[i * tr_nlevels + k] - scratch[maxima[0] - 1]);

      /* rescale them to sum up to 1. */
      for (k = 0; k < tr_nlevels; k++)
        pt[i * tr_nlevels + k] /= sum;

    }/*THEN*/

    if (nmax == 1) {

      res[i] = maxima[0];

      if (*debuglevel > 0) {

        Rprintf("  @ prediction for observation %d is '%s' with (log-)posterior:\n",
          i + 1, CHAR(STRING_ELT(tr_levels, res[i] - 1)));

        Rprintf("  ");
        for (k = 0; k < tr_nlevels; k++)
          Rprintf("  %lf", scratch[k]);
        Rprintf("\n");

      }/*THEN*/

    }/*THEN*/
    else {

      /* break ties: sample with replacement from all the maxima. */
      SampleReplace(1, nmax, res + i, maxima);

      if (*debuglevel > 0) {

        Rprintf("  @ there are %d levels tied for prediction of observation %d, applying tie breaking.\n", nmax, i + 1);

        Rprintf("  ");
        for (k = 0; k < tr_nlevels; k++)
          Rprintf("  %lf", scratch[k]);
        Rprintf("\n");

        Rprintf("  @ tied levels are:");
        for (k = 0; k < nmax; k++)
          Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[k] - 1)));
        Rprintf(".\n");

      }/*THEN*/

    }/*ELSE*/

  }/*FOR*/

  /* save the state of the random number generator. */
  PutRNGstate();

  /* add back the attributes and the class to the return value. */
  PROTECT(class = allocVector(STRSXP, 1));
  SET_STRING_ELT(class, 0, mkChar("factor"));
  setAttrib(result, R_LevelsSymbol, tr_levels);
  setAttrib(result, R_ClassSymbol, class);

  if (*include_prob > 0) {

    /* set the levels of the taregt variable as rownames. */
    PROTECT(dimnames = allocVector(VECSXP, 2));
    SET_VECTOR_ELT(dimnames, 0, tr_levels);
    setAttrib(probtab, R_DimNamesSymbol, dimnames);
    /* add the posterior probabilities to the return value. */
    setAttrib(result, install("prob"), probtab);

    UNPROTECT(4);

  }/*THEN*/
  else {

    UNPROTECT(2);

  }/*ELSE*/

  return result;

}/*NAIVEPRED*/
Пример #3
0
/* Shrinked Covariance Matrix. */
SEXP cov_lambda(SEXP data, SEXP length) {

int i = 0, j = 0, k = 0, cur = 0;
int *n = INTEGER(length), ncols = LENGTH(data);
double *mean = NULL, *var = NULL, **column = NULL;
double lambda = 0, sumcors = 0, sumvars = 0;
SEXP res;

  /* allocate the covariance matrix. */
  PROTECT(res = allocMatrix(REALSXP, ncols, ncols));
  var = REAL(res);
  memset(var, '\0', ncols * ncols * sizeof(double));

  /* allocate an array to store the mean values. */
  mean = alloc1dreal(ncols);

  /* allocate and initialize an array of pointers for the variables. */
  column = (double **) alloc1dpointer(ncols);
  for (i = 0; i < ncols; i++)
    column[i] = REAL(VECTOR_ELT(data, i));

  /* compute the mean values  */
  for (i = 0; i < ncols; i++) {

    for (j = 0 ; j < *n; j++)
      mean[i] += column[i][j];

    mean[i] /= (*n);

  }/*FOR*/

  for (i = 0; i < ncols; i++) {

    for (j = i; j < ncols; j++) {

      cur = CMC(i, j, ncols);

      /* compute the actual variance/covariance. */
      for (k = 0; k < *n; k++)
        var[cur] += (column[i][k] - mean[i]) * (column[j][k] - mean[j]);

      if (i != j) {

        /* do the first round of computations for the shrinkage intensity. */
        for (k = 0; k < *n; k++) {

          sumvars +=
            ((column[i][k] - mean[i]) * (column[j][k] - mean[j]) - var[cur] / (*n)) *
            ((column[i][k] - mean[i]) * (column[j][k] - mean[j]) - var[cur] / (*n));

        }/*FOR*/

        sumcors += (var[cur] / (*n - 1)) * (var[cur] / (*n - 1));

      }/*THEN*/

      /* use the unbiased estimator for variances/covariances. */
      var[cur] /= (*n) - 1;

      /* fill in the symmetric element of the matrix. */
      var[CMC(j, i, ncols)] = var[cur];

    }/*FOR*/

  }/*FOR*/

  /* wrap up the computation of the shrinkage intensity. */
  lambda = sumvars * (*n) / (*n - 1) / (*n -1) / (*n -1) / sumcors;

  /* truncate the shrinkage intensity in the [0,1] interval; this is not an
   * error, but a measure to increase the quality of the shrinked estimate. */
  if (lambda > 1) {

    lambda = 1;

  }/*THEN*/
  else if (lambda < 0) {

    lambda = 0;

  }/*THEN*/

  /* shrink the covariance matrix (except the diagonal, which stays the same). */
  for (i = 0; i < ncols; i++)
    for (j = 0; j < ncols; j++)
      if (i != j)
        var[CMC(i, j, ncols)] *= 1 - lambda;

  UNPROTECT(1);
  return res;

}/*COV_LAMBDA*/
Пример #4
0
/* predict the values of one or more variables given one or more variables by
 * maximum a posteriori (MAP). */
SEXP mappred(SEXP node, SEXP fitted, SEXP data, SEXP n, SEXP from, SEXP debug) {

int i = 0, j = 0, k = 0, nobs = 0, nev = 0, nlvls = 0;
int *vartypes = NULL, nsims = INT(n), debuglevel = isTRUE(debug);
void **varptrs = NULL, **evptrs = NULL, *pred = NULL, *res = NULL;
SEXP result, colnames, evidence, evmatch, temp = R_NilValue;
SEXP cpdist, predicted, lvls = R_NilValue;
double *wgt = NULL;
long double *lvls_counts = NULL;

  /* extract the names of the variables in the data. */
  colnames = getAttrib(data, R_NamesSymbol);

  /* remove the name of the variable to predict. */
  nev = length(from);
  PROTECT(evmatch = match(colnames, from, 0));

  /* cache variable types and pointers. */
  vartypes = alloc1dcont(nev);
  varptrs = alloc1dpointer(nev);
  for (j = 0, k = 0; j < nev; j++) {

    temp = VECTOR_ELT(data, INTEGER(evmatch)[j] - 1);
    vartypes[k] = TYPEOF(temp);
    varptrs[k++] = DATAPTR(temp);

  }/*FOR*/

  /* cache the sample size. */
  nobs = length(temp);

  /* allocate a list to hold the evidence. */
  PROTECT(evidence = allocVector(VECSXP, nev));
  setAttrib(evidence, R_NamesSymbol, from);

  /* cache pointers to the elements of the evidence .*/
  evptrs = alloc1dpointer(nev);

  for (j = 0; j < nev; j++) {

    PROTECT(temp = allocVector(vartypes[j], 1));
    evptrs[j] = DATAPTR(temp);
    SET_VECTOR_ELT(evidence, j, temp);
    UNPROTECT(1);

  }/*FOR*/

  /* make the evidence a data frame to compact debugging output. */
  minimal_data_frame(evidence);

  /* allocate the return value. */
  PROTECT(result = fitnode2df(fitted, STRING_ELT(node, 0), nobs));
  res = DATAPTR(result);

  /* in the case of discrete variables, allocate scratch space for levels'
   * frequencies. */
  if (TYPEOF(result) == INTSXP) {

    lvls = getAttrib(result, R_LevelsSymbol);
    nlvls = length(lvls);
    lvls_counts = allocldouble(nlvls);

  }/*THEN*/

  /* allocate the weights. */
  wgt = alloc1dreal(nsims);

  /* allocate sratch space for the random samplings. */
  PROTECT(cpdist = fit2df(fitted, nsims));
  predicted = getListElement(cpdist, (char *)CHAR(STRING_ELT(node, 0)));
  pred = DATAPTR(predicted);

  /* iterate over the observations. */
  for (i = 0; i < nobs; i++) {

    /* copy the values into the list. */
    for (j = 0; j < nev; j++) {

      switch(vartypes[j]) {

        case REALSXP:

          *((double *)evptrs[j]) = ((double *)varptrs[j])[i];
          break;

        case INTSXP:

          *((int *)evptrs[j]) = ((int *)varptrs[j])[i];
          break;

      }/*SWITCH*/

    }/*FOR*/

    if (debuglevel > 0) {

      Rprintf("* predicting observation %d conditional on:\n", i);
      PrintValue(evidence);

    }/*THEN*/

    /* generate samples from the conditional posterior distribution. */
    c_rbn_master(fitted, cpdist, n, evidence, FALSE);
    /* compute the weights. */
    c_lw_weights(fitted, cpdist, nsims, wgt, from, FALSE);

    /* compute the posterior estimate. */
    switch(TYPEOF(predicted)) {

      case REALSXP:

        /* average the predicted values. */
        ((double *)res)[i] = posterior_mean((double *)pred, wgt, nsims,
                               debuglevel);
        break;

      case INTSXP:

        /* pick the most frequent value. */
        ((int *)res)[i] = posterior_mode((int *)pred, wgt, nsims, lvls_counts,
                            lvls, nlvls, debuglevel);
        break;

    }/*SWITCH*/

  }/*FOR*/

  UNPROTECT(4);

  return result;

}/*MAPPRED*/
Пример #5
0
/* conditional Monte Carlo simulation for correlation-based tests. */
SEXP gauss_cmcarlo(SEXP data, SEXP length, SEXP samples, SEXP test, SEXP alpha) {

int j = 0, k = 0, ncols = LENGTH(data), errcode = 0, *work = NULL, *perm = NULL;
int error_counter = 0, *B = INTEGER(samples), *num = INTEGER(length);
double observed = 0, permuted = 0, *yperm = NULL, *yorig = NULL, *res = NULL;
double enough = ceil(NUM(alpha) * (*B)) + 1;
double **column = NULL, *mean = NULL, *covariance = NULL, *covariance_backup = NULL;
double *u = NULL, *d = NULL, *vt = NULL;
SEXP result;

  /* allocate the matrices needed for the SVD decomposition. */
  u = alloc1dreal(ncols * ncols);
  d = alloc1dreal(ncols);
  vt = alloc1dreal(ncols * ncols);

  /* allocate and initialize the result. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* allocate and initialize an array of pointers for the variables. */
  column = (double **) alloc1dpointer(ncols);
  for (j = 0; j < ncols; j++)
    column[j] = REAL(VECTOR_ELT(data, j));

  /* cache the means of the variables (they are invariant under permutation). */
  mean = alloc1dreal(ncols);

  /* compute the mean values  */
  for (j = 0; j < ncols; j++) {

    for (k = 0 ; k < *num; k++)
      mean[j] += column[j][k];

    mean[j] /= (*num);

  }/*FOR*/

  /* allocate and initialize the covariance matrix. */
  covariance = alloc1dreal(ncols * ncols);
  covariance_backup = alloc1dreal(ncols * ncols);
  c_covmat(column, mean, &ncols, num, covariance);
  memcpy(covariance_backup, covariance, ncols * ncols * sizeof(double));

  /* substitute the original data with the fake column that will be permuted. */
  yperm = alloc1dreal(*num);
  yorig = column[1];
  memcpy(yperm, yorig, *num * sizeof(double));
  column[1] = yperm;

   /* allocate the arrays needed by RandomPermutation. */
  perm = alloc1dcont(*num);
  work = alloc1dcont(*num);

  /* initialize the random number generator. */
  GetRNGstate();

  /* pick up the observed value of the test statistic, then generate a set of
     random permutations (all variable but the second are fixed) and check how
     many tests are greater (in absolute value) than the original one.*/
  switch(INT(test)) {

    case GAUSSIAN_MUTUAL_INFORMATION:
    case LINEAR_CORRELATION:
    case FISHER_Z:
      observed = c_fast_pcor(covariance, &ncols, u, d, vt, &errcode);

      if (errcode)
        error("an error (%d) occurred in the call to dgesvd().\n", errcode);

      for (j = 0; j < (*B); j++) {

        /* reset the error flag of the SVD Fortran routine. */
        errcode = 0;

        RandomPermutation(*num, perm, work);

        for (k = 0; k < *num; k++)
          yperm[k] = yorig[perm[k]];

        /* restore the covariance matrix from the good copy. */
        memcpy(covariance, covariance_backup, ncols * ncols * sizeof(double));
        /* update the relevant covariances. */
        c_update_covmat(column, mean, 1, &ncols, num, covariance);

        permuted = c_fast_pcor(covariance, &ncols, u, d, vt, &errcode);

        if (errcode != 0)
          error_counter++;

        if (fabs(permuted) > fabs(observed)) {

          sequential_counter_check(*res);

        }/*THEN*/

      }/*FOR*/

    if (error_counter > 0)
      warning("unable to compute %d permutations due to errors in dgesvd().\n",
        error_counter);

    break;

  }/*SWITCH*/

  PutRNGstate();

  /* save the observed p-value. */
  *res /= *B;

  UNPROTECT(1);

  return result;

}/*GAUSS_CMCARLO*/
Пример #6
0
SEXP cwpost(SEXP x, SEXP z, SEXP imaginary, SEXP phi_coef) {

int i = 0, j = 0, k = 0;
int ncols = LENGTH(z), num = LENGTH(x), tau_ncols = LENGTH(z) + 1;
int *iss = INTEGER(imaginary), rho = *iss + ncols;
double logscale = 0, logk = 0, xprod = 0, var_x = 0, zi_mu = 0, phi = 0;
double *xx = REAL(x), *phic = REAL(phi_coef), *workspace = NULL;
double *res = NULL, **zz = NULL, *zi = NULL, *mu = NULL, *delta_mu = NULL;
double *tau = NULL, *invtau = NULL, *old_tau = NULL, *old_mu = NULL;
SEXP result;

  /* allocate a workspace vector. */
  workspace = alloc1dreal(tau_ncols);  

  /* allocate and initialize the parent configuration. */
  zi = alloc1dreal(ncols + 1);
  zi[0] = 1;

  /* estimate mu and var_x. */
  mu = alloc1dreal(tau_ncols);
  old_mu = alloc1dreal(tau_ncols);
  delta_mu = alloc1dreal(tau_ncols);

  for (i = 0; i < num; i++)
    mu[0] += xx[i];
  mu[0] /= num;

  for (i = 0; i < num; i++)
    var_x += (xx[i] - mu[0]) * (xx[i] - mu[0]);
  var_x /= num - 1;

  /* initialize phi. */
  phi = var_x * (*phic);

  /* allocate and initialize an array of pointers for the variables. */
  zz = (double **) alloc1dpointer(ncols);
  for (j = 0; j < ncols; j++)
    zz[j] = REAL(VECTOR_ELT(z, j));

  /* allocate and initialize tau. */
  tau = alloc1dreal(tau_ncols * tau_ncols);  
  old_tau = alloc1dreal(tau_ncols * tau_ncols);  
  invtau = alloc1dreal(tau_ncols * tau_ncols);  
  build_tau(zz, tau, &ncols, &num, iss, phic);
  memcpy(old_tau, tau, tau_ncols * tau_ncols * sizeof(double));
  c_ginv(tau, &tau_ncols, invtau);

  /* allocate and initialize result to zero. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;

  /* for each sample... */
  for (i = 0; i < num; i++) {

    /* ... extract the values of the parents ... */
    for (j = 0; j < ncols; j++)
      zi[j + 1] = zz[j][i];

    /* ... compute the Mahalanobis distance of z[i] ... */
    xprod = c_quadratic(zi, &tau_ncols, invtau, zi, workspace);

    /* ... compute the scale factor ... */
    logscale = log(phi) + log1p(xprod);
    logk = lgammafn(0.5 * (1 + rho)) - lgammafn(0.5 * rho);
    logk -= 0.5 * (logscale + log(M_PI));

    /* and then the score for the variable. */
    for (j = 0, zi_mu = 0; j < tau_ncols; j++)
      zi_mu += zi[j] * mu[j];

    *res += logk - 0.5 * (1 + rho) * 
             log1p((xx[i] - zi_mu) * (xx[i] - zi_mu) / exp(logscale));

    /* For the next iteration, update the tau matrix ... */
    memcpy(old_tau, tau, tau_ncols * tau_ncols * sizeof(double));

    for (j = 0; j < tau_ncols; j++)
      for (k = j; k < tau_ncols; k++)
        tau[CMC(j, k, tau_ncols)] = tau[CMC(k, j, tau_ncols)] = 
          tau[CMC(j, k, tau_ncols)] + zi[j] * zi[k];

    /* ... its inverse  ... */
    c_finv(tau, &tau_ncols, invtau);

    /* ... update the mu vector ... */
    memcpy(old_mu, mu, tau_ncols * sizeof(double));
    c_rotate(invtau, old_tau, mu, &(xx[i]), zi, &tau_ncols, workspace);

    /* ... update rho (ISS + sample size evaluated at the current iteration) ... */
    rho++;

    /* ... and update phi. */
    for (j = 0; j < tau_ncols; j++)
      delta_mu[j] = old_mu[j] - mu[j];
    for (j = 0, zi_mu = 0; j < tau_ncols; j++)
      zi_mu += zi[j] * mu[j];

    phi += (xx[i] - zi_mu) * xx[i] +
             c_quadratic(delta_mu, &tau_ncols, old_tau, old_mu, workspace); 

  }/*FOR*/

  UNPROTECT(1);
  return result;

}/*CWPOST*/
Пример #7
0
SEXP entropy_loss(SEXP fitted, SEXP orig_data, SEXP by_sample, SEXP keep,
    SEXP debug) {

int i = 0, k = 0, ndata = 0, nnodes = LENGTH(fitted), nlevels = 0, type = 0;
int *configs = NULL, *debuglevel = LOGICAL(debug), *by = LOGICAL(by_sample);
int *to_keep = NULL;
double *res = 0, *res_sample = NULL, **columns = 0, cur_loss = 0;
const char *class = NULL;
SEXP data, cur_node, nodes, result, result_sample, coefs, sd, parents, try;

  /* get the node labels. */
  nodes = getAttrib(fitted, R_NamesSymbol);
  /* rearrange the columns of the data to match the network. */
  PROTECT(data = c_dataframe_column(orig_data, nodes, FALSE, TRUE));
  /* get the sample size. */
  ndata = LENGTH(VECTOR_ELT(data, 0));
  /* allocate and initialize the return value. */
  PROTECT(result = allocVector(REALSXP, 1));
  res = REAL(result);
  *res = 0;
  /* allocate the sample's contributions if needed. */
  if (*by > 0) {

    PROTECT(result_sample = allocVector(REALSXP, ndata));
    res_sample = REAL(result_sample);
    memset(res_sample, '\0', ndata * sizeof(double));

  }/*THEN*/

  /* find out which nodes to use in computing the entropy loss. */
  PROTECT(try = match(nodes, keep, 0));
  to_keep = INTEGER(try);
  R_isort(to_keep, LENGTH(try));

  /* determine the class of the fitted network. */
  class = CHAR(STRING_ELT(getAttrib(VECTOR_ELT(fitted, 0), R_ClassSymbol), 0));

  if (strcmp(class, "bn.fit.gnode") == 0) {

    /* dereference the data set's columns. */
    columns = (double **) alloc1dpointer(nnodes);
    for (i = 0; i < nnodes; i++)
      columns[i] = REAL(VECTOR_ELT(data, i));

    type = GAUSSIAN;

  }/*THEN*/
  else if ((strcmp(class, "bn.fit.dnode") == 0) || (strcmp(class, "bn.fit.onode") == 0)) {

    /* allocate an array for parents' configurations. */
    configs = alloc1dcont(ndata);

    type = DISCRETE;

  }/*THEN*/

  /* iterate over the nodes. */
  for (i = 0; i < nnodes; i++) {

    if (i == to_keep[k] - 1) {

      k++;

    }/*THEN*/
    else {

      if (*debuglevel > 0)
        Rprintf("  > skipping node %s.\n", NODE(i));

      continue;

    }/*ELSE*/

    /* get the current node. */
    cur_node = VECTOR_ELT(fitted, i);
    /* get the parents of the node. */
    parents = getListElement(cur_node, "parents");
    /* get the parameters (regression coefficients and residuals' standard
     * deviation for Gaussian nodes, conditional probabilities for discrete
     * nodes), and compute the loss. */
    switch(type)  {

      case GAUSSIAN:

        coefs = getListElement(cur_node, "coefficients");
        sd = getListElement(cur_node, "sd");

        cur_loss = c_gloss(&i, parents, REAL(coefs), REAL(sd), columns, nodes,
                     ndata, res_sample);
        break;

      case DISCRETE:

        coefs = getListElement(cur_node, "prob");
        nlevels = INT(getAttrib(coefs, R_DimSymbol));

        cur_loss = c_dloss(&i, parents, configs, REAL(coefs), data, nodes,
                     ndata, nlevels, res_sample);
        break;

    }/*SWITCH*/

    if (*debuglevel > 0)
      Rprintf("  > log-likelihood loss for node %s is %lf.\n", NODE(i), cur_loss);

    /* add the node contribution to the return value. */
    *res += cur_loss;

  }/*FOR*/

  if (*by > 0) {

    UNPROTECT(4);
    return result_sample;

  }/*THEN*/
  else {

    UNPROTECT(3);
    return result;

  }/*ELSE*/

}/*ENTROPY_LOSS*/