Esempio n. 1
0
// modificato il prototipo per aumentare l'efficienza
VETTOREi *sample1(VETTOREi *ris, int n, int k, int replace, double *p)
{
    int *x;
    int i, nc = 0;

    GetRNGstate();
    CREAv_i(ris, k);
    // g_x mi serve comunque (e sempre di dim. n)
    // OTTIMIZZATA da me!
    CREAv_i(g_x, n); // cosi` non ho problemi nel caso sia troppo piccolo
    x = g_x->dati;
    if (p != NULL) {
        FixupProb(p, n, k, replace);
        if (replace) {
            for (i = 0; i < n; i++) {
                if (n * p[i] > 0.1)
                    nc++;
            }
            if (nc > 200)
                walker_ProbSampleReplace(n, p, x, k, ris->dati);
            else
                ProbSampleReplace(n, p, x, k, ris->dati);
        }
        else
            ProbSampleNoReplace(n, p, x, k, ris->dati);
    }
    else {
        /* avoid allocation for a single sample */
        if (replace || k < 2)
            SampleReplace(k, n, ris->dati);
        else { // ho gia` allocato g_x
            SampleNoReplace(k, n, ris->dati, x);
        }
    }
    PutRNGstate();
    return ris;
}
Esempio n. 2
0
/* predict the value of a discrete node without parents. */
SEXP dpred(SEXP fitted, SEXP data, SEXP debug) {

int i = 0, nmax = 0, ndata = LENGTH(data), length = 0;
int *res = NULL, *debuglevel = LOGICAL(debug), *iscratch = NULL, *maxima = NULL;
double *prob = NULL, *dscratch = NULL, *buf = NULL;
SEXP ptab, result, tr_levels = getAttrib(data, R_LevelsSymbol);

  /* get the probabilities of the multinomial distribution. */
  ptab = getListElement(fitted, "prob");
  length = LENGTH(ptab);
  prob = REAL(ptab);

  /* create the vector of indexes. */
  iscratch = alloc1dcont(length);
  for (i = 0; i < length; i++)
    iscratch[i] = i + 1;

  /* create a scratch copy of the array. */
  buf = alloc1dreal(length);
  dscratch = alloc1dreal(length);
  memcpy(dscratch, prob, length * sizeof(double));

  /* allocate the array for the indexes of the maxima. */
  maxima = alloc1dcont(length);

  /* find out the mode(s). */
  all_max(dscratch, length, maxima, &nmax, iscratch, buf);

  /* allocate and initialize the return value. */
  PROTECT(result = allocVector(INTSXP, ndata));
  res = INTEGER(result);

  if (nmax == 1) {

    /* copy the index of the mode in the return value. */
    for (i = 0; i < ndata; i++)
      res[i] = maxima[0];

    if (*debuglevel > 0) {

      if (res[0] == NA_INTEGER)
        Rprintf("  > prediction for all observations is NA with probabilities:\n");
      else
        Rprintf("  > prediction for all observations is '%s' with probabilities:\n",
          CHAR(STRING_ELT(tr_levels, res[0] - 1)));

      Rprintf("  ");
      for (i = 0; i < LENGTH(ptab); i++)
        Rprintf("  %lf", prob[i]);
      Rprintf("\n");

    }/*THEN*/

  }/*THEN*/
  else {

    /* break ties: sample with replacement from all the maxima. */
    GetRNGstate();
    SampleReplace(ndata, nmax, res, maxima);
    PutRNGstate();

    if (*debuglevel > 0) {

      Rprintf("  > there are %d levels tied for prediction, applying tie breaking.\n", nmax);
      Rprintf("  > tied levels are:");
      for (i = 0; i < nmax; i++)
        Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[i] - 1)));
      Rprintf(".\n");

    }/*THEN*/

  }/*ELSE*/

  /* copy the labels and the class from the input data. */
  setAttrib(result, R_LevelsSymbol, tr_levels);
  setAttrib(result, R_ClassSymbol, getAttrib(data, R_ClassSymbol));

  UNPROTECT(1);

  return result;

}/*DPRED*/
Esempio n. 3
0
/* predict the value of the training variable in a naive Bayes or Tree-Augmented
 * naive Bayes classifier. */
SEXP naivepred(SEXP fitted, SEXP data, SEXP parents, SEXP training, SEXP prior,
    SEXP prob, SEXP debug) {

int i = 0, j = 0, k = 0, n = 0, nvars = LENGTH(fitted), nmax = 0, tr_nlevels = 0;
int *res = NULL, **ex = NULL, *ex_nlevels = NULL;
int idx = 0, *tr_id = INTEGER(training);
int *iscratch = NULL, *maxima = NULL, *prn = NULL, *debuglevel = LOGICAL(debug);
int *include_prob = LOGICAL(prob);
double **cpt = NULL, *pr = NULL, *scratch = NULL, *buf = NULL, *pt = NULL;
double sum = 0;
SEXP class, temp, tr, tr_levels, result, nodes, probtab, dimnames;

  /* cache the node labels. */
  nodes = getAttrib(fitted, R_NamesSymbol);

  /* cache the pointers to all the variables. */
  ex = (int **) alloc1dpointer(nvars);
  ex_nlevels = alloc1dcont(nvars);

  for (i = 0; i < nvars; i++) {

    temp = VECTOR_ELT(data, i);
    ex[i] = INTEGER(temp);
    ex_nlevels[i] = NLEVELS(temp);

  }/*FOR*/

  /* get the training variable and its levels. */
  n = LENGTH(VECTOR_ELT(data, 0));
  tr = getListElement(VECTOR_ELT(fitted, *tr_id - 1), "prob");
  tr_levels = VECTOR_ELT(getAttrib(tr, R_DimNamesSymbol), 0);
  tr_nlevels = LENGTH(tr_levels);
  /* get the prior distribution. */
  pr = REAL(prior);

  if (*debuglevel > 0) {

    Rprintf("* the prior distribution for the target variable is:\n");
    PrintValue(prior);

  }/*THEN*/

  /* allocate the scratch space used to compute posterior probabilities. */
  scratch = alloc1dreal(tr_nlevels);
  buf = alloc1dreal(tr_nlevels);

  /* cache the pointers to the conditional probability tables. */
  cpt = (double **) alloc1dpointer(nvars);

  for (i = 0; i < nvars; i++) 
    cpt[i] = REAL(getListElement(VECTOR_ELT(fitted, i), "prob"));

  /* dereference the parents' vector. */
  prn = INTEGER(parents);

  /* create the vector of indexes. */
  iscratch = alloc1dcont(tr_nlevels);

  /* allocate the array for the indexes of the maxima. */
  maxima = alloc1dcont(tr_nlevels);

  /* allocate the return value. */
  PROTECT(result = allocVector(INTSXP, n));
  res = INTEGER(result);

  /* allocate and initialize the table of the posterior probabilities. */
  if (*include_prob > 0) {

    PROTECT(probtab = allocMatrix(REALSXP, tr_nlevels, n));
    pt = REAL(probtab);
    memset(pt, '\0', n * tr_nlevels * sizeof(double));

  }/*THEN*/

  /* initialize the random seed, just in case we need it for tie breaking. */
  GetRNGstate();

  /* for each observation... */
  for (i = 0; i < n; i++) {

    /* ... reset the scratch space and the indexes array... */
    for (k = 0; k < tr_nlevels; k++) {

      scratch[k] = log(pr[k]);
      iscratch[k] = k + 1;

    }/*FOR*/

    if (*debuglevel > 0)
      Rprintf("* predicting the value of observation %d.\n", i + 1);

    /* ... and for each conditional probability table... */
    for (j = 0; j < nvars; j++) {

      /* ... skip the training variable... */
      if (*tr_id == j + 1)
        continue;

      /* ... (this is the root node of the Chow-Liu tree) ... */
      if (prn[j] == NA_INTEGER) {

        /* ... and for each row of the conditional probability table... */
        for (k = 0; k < tr_nlevels; k++) {

          if (*debuglevel > 0) {

            Rprintf("  > node %s: picking cell %d (%d, %d) from the CPT (p = %lf).\n",
              NODE(j), CMC(ex[j][i] - 1, k, ex_nlevels[j]), ex[j][i], k + 1,
              cpt[j][CMC(ex[j][i] - 1, k, ex_nlevels[j])]);

          }/*THEN*/

          /* ... update the posterior probability. */
          scratch[k] += log(cpt[j][CMC(ex[j][i] - 1, k, ex_nlevels[j])]);

        }/*FOR*/

      }/*THEN*/
      else {

        /* ... and for each row of the conditional probability table... */
        for (k = 0; k < tr_nlevels; k++) {

          /* (the first dimension corresponds to the current node [X], the second
           * to the training node [Y], the third to the only parent of the current
           * node [Z]; CMC coordinates are computed as X + Y * NX + Z * NX * NY. */
          idx = (ex[j][i] - 1) + k * ex_nlevels[j] + 
                  (ex[prn[j] - 1][i] - 1) * ex_nlevels[j] * tr_nlevels;

          if (*debuglevel > 0) {

            Rprintf("  > node %s: picking cell %d (%d, %d, %d) from the CPT (p = %lf).\n",
              NODE(j), idx, ex[j][i], k + 1, ex[prn[j] - 1][i], cpt[j][idx]);

          }/*THEN*/

          /* ... update the posterior probability. */
          scratch[k] += log(cpt[j][idx]);

        }/*FOR*/

      }/*ELSE*/

    }/*FOR*/

    /* find out the mode(s). */
    all_max(scratch, tr_nlevels, maxima, &nmax, iscratch, buf);

    /* compute the posterior probabilities on the right scale, to attach them
     * to the return value. */
    if (*include_prob) {

      /* copy the log-probabilities from scratch. */
      memcpy(pt + i * tr_nlevels, scratch, tr_nlevels * sizeof(double));

      /* transform log-probabilitiees into plain probabilities. */
      for (k = 0, sum = 0; k < tr_nlevels; k++)
        sum += pt[i * tr_nlevels + k] = exp(pt[i * tr_nlevels + k] - scratch[maxima[0] - 1]);

      /* rescale them to sum up to 1. */
      for (k = 0; k < tr_nlevels; k++)
        pt[i * tr_nlevels + k] /= sum;

    }/*THEN*/

    if (nmax == 1) {

      res[i] = maxima[0];

      if (*debuglevel > 0) {

        Rprintf("  @ prediction for observation %d is '%s' with (log-)posterior:\n",
          i + 1, CHAR(STRING_ELT(tr_levels, res[i] - 1)));

        Rprintf("  ");
        for (k = 0; k < tr_nlevels; k++)
          Rprintf("  %lf", scratch[k]);
        Rprintf("\n");

      }/*THEN*/

    }/*THEN*/
    else {

      /* break ties: sample with replacement from all the maxima. */
      SampleReplace(1, nmax, res + i, maxima);

      if (*debuglevel > 0) {

        Rprintf("  @ there are %d levels tied for prediction of observation %d, applying tie breaking.\n", nmax, i + 1);

        Rprintf("  ");
        for (k = 0; k < tr_nlevels; k++)
          Rprintf("  %lf", scratch[k]);
        Rprintf("\n");

        Rprintf("  @ tied levels are:");
        for (k = 0; k < nmax; k++)
          Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[k] - 1)));
        Rprintf(".\n");

      }/*THEN*/

    }/*ELSE*/

  }/*FOR*/

  /* save the state of the random number generator. */
  PutRNGstate();

  /* add back the attributes and the class to the return value. */
  PROTECT(class = allocVector(STRSXP, 1));
  SET_STRING_ELT(class, 0, mkChar("factor"));
  setAttrib(result, R_LevelsSymbol, tr_levels);
  setAttrib(result, R_ClassSymbol, class);

  if (*include_prob > 0) {

    /* set the levels of the taregt variable as rownames. */
    PROTECT(dimnames = allocVector(VECSXP, 2));
    SET_VECTOR_ELT(dimnames, 0, tr_levels);
    setAttrib(probtab, R_DimNamesSymbol, dimnames);
    /* add the posterior probabilities to the return value. */
    setAttrib(result, install("prob"), probtab);

    UNPROTECT(4);

  }/*THEN*/
  else {

    UNPROTECT(2);

  }/*ELSE*/

  return result;

}/*NAIVEPRED*/
Esempio n. 4
0
/* predict the value of a discrete node with one or more parents. */
SEXP cdpred(SEXP fitted, SEXP data, SEXP parents, SEXP debug) {

int i = 0, k = 0, ndata = LENGTH(data), nrows = 0, ncols = 0;
int *configs = INTEGER(parents), *debuglevel = LOGICAL(debug);
int *iscratch = NULL, *maxima = NULL, *nmax = NULL, *res = NULL;
double *prob = NULL, *dscratch = NULL, *buf = NULL;
SEXP temp, result, tr_levels = getAttrib(data, R_LevelsSymbol);

  /* get the probabilities of the multinomial distribution. */
  temp = getListElement(fitted, "prob");
  nrows = INT(getAttrib(temp, R_DimSymbol));
  ncols = LENGTH(temp) / nrows;
  prob = REAL(temp);

  /* create the vector of indexes. */
  iscratch = alloc1dcont(nrows);

  /* create a scratch copy of the array. */
  buf = alloc1dreal(nrows);
  dscratch = alloc1dreal(nrows * ncols);
  memcpy(dscratch, prob, nrows * ncols * sizeof(double));

  /* allocate the array for the indexes of the maxima. */
  maxima = alloc1dcont(nrows * ncols);

  /* allocate the maxima counters. */
  nmax = alloc1dcont(ncols);

  /* get the mode for each configuration. */
  for (i = 0; i < ncols; i++) {

    /* initialize the vector of indexes. */
    for (k = 0; k < nrows; k++)
      iscratch[k] = k + 1;

    /* find out the mode(s). */
    all_max(dscratch + i * nrows, nrows, maxima + i * nrows,
      nmax + i, iscratch, buf);

  }/*FOR*/

  /* allocate and initialize the return value. */
  PROTECT(result = allocVector(INTSXP, ndata));
  res = INTEGER(result);

  /* initialize the random seed, just in case we need it for tie breaking. */
  GetRNGstate();

  /* copy the index of the mode in the return value. */
  for (i = 0; i < ndata; i++) {

    if (nmax[configs[i] - 1] == 1) {

      res[i] = maxima[CMC(0, configs[i] - 1, nrows)];

      if (*debuglevel > 0) {

        if (res[i] == NA_INTEGER)
          Rprintf("  > prediction for observation %d is NA with probabilities:\n");
        else
          Rprintf("  > prediction for observation %d is '%s' with probabilities:\n",
            i + 1, CHAR(STRING_ELT(tr_levels, res[i] - 1)));

        Rprintf("  ");
        for (int k = 0; k < nrows; k++)
          Rprintf("  %lf", (prob + nrows * (configs[i] - 1))[k]);
        Rprintf("\n");

      }/*THEN*/

    }/*THEN*/
    else {

      /* break ties: sample with replacement from all the maxima. */
      SampleReplace(1, nmax[configs[i] - 1], res + i, maxima + (configs[i] - 1) * nrows);

      if (*debuglevel > 0) {

        Rprintf("  > there are %d levels tied for prediction of observation %d, applying tie breaking.\n",
          nmax[configs[i] - 1], i + 1);
        Rprintf("  > tied levels are:");
        for (k = 0; k < nmax[configs[i] - 1]; k++)
          Rprintf(" %s", CHAR(STRING_ELT(tr_levels, maxima[CMC(k, configs[i] - 1, nrows)] - 1)));
        Rprintf(".\n");

      }/*THEN*/

    }/*ELSE*/

  }/*FOR*/

  /* save the state of the random number generator. */
  PutRNGstate();

  /* copy the labels and the class from the input data. */
  setAttrib(result, R_LevelsSymbol, tr_levels);
  setAttrib(result, R_ClassSymbol, getAttrib(data, R_ClassSymbol));

  UNPROTECT(1);

  return result;

}/*CDPRED*/