Exemplo n.º 1
0
SEXP smart_network_averaging(SEXP arcs, SEXP nodes, SEXP weights) {

int k = 0, from = 0, to = 0, nrows = length(arcs) / 2, dims = length(nodes);
int *a = NULL, *coords = NULL, *poset = NULL, *path = NULL, *scratch = NULL;
double *w = NULL;
SEXP weights2, amat, try, acyclic;

  /* allocate and initialize the adjacency matrix. */
  PROTECT(amat = allocMatrix(INTSXP, dims, dims));
  a = INTEGER(amat);
  memset(a, '\0', sizeof(int) * dims * dims);

  /* match the node labels in the arc set. */
  PROTECT(try = match(nodes, arcs, 0));
  coords = INTEGER(try);

  /* duplicate the weights to preserve the original ones. */
  PROTECT(weights2 = duplicate(weights));
  w = REAL(weights2);

  /* sort the strength coefficients. */
  poset = Calloc1D(nrows, sizeof(int));
  for (k = 0; k < nrows; k++)
    poset[k] = k;
  R_qsort_I(w, poset, 1, nrows);

  /* allocate buffers for c_has_path(). */
  path = Calloc1D(dims, sizeof(int));
  scratch = Calloc1D(dims, sizeof(int));

  /* iterate over the arcs in reverse order wrt their strength coefficients. */
  for (k = 0; k < nrows; k++) {

    from = coords[poset[k]] - 1;
    to = coords[poset[k] + nrows] - 1;

    /* add an arc only if it does not introduce cycles. */
    if (!c_has_path(to, from, a, dims, nodes, FALSE, TRUE, path, scratch, FALSE))
      a[CMC(from, to, dims)] = 1;
    else
      warning("arc %s -> %s would introduce cycles in the graph, ignoring.",
        NODE(from), NODE(to));

  }/*FOR*/

  /* convert the adjacency matrix back to an arc set and return it. */
  acyclic = amat2arcs(amat, nodes);

  Free1D(path);
  Free1D(scratch);
  Free1D(poset);

  UNPROTECT(3);

  return acyclic;

}/*SMART_NETWORK_AVERAGING*/
Exemplo n.º 2
0
/* R function  qsort(x, index.return) */
SEXP attribute_hidden do_qsort(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    SEXP x, sx;
    int indx_ret, n;
    double *vx = NULL;
    int *ivx = NULL;
    Rboolean x_real, x_int;

    checkArity(op, args);
    x = CAR(args);
    if (!isNumeric(x))
	error(_("argument is not a numeric vector"));
    x_real= TYPEOF(x) == REALSXP;
    x_int = !x_real && (TYPEOF(x) == INTSXP || TYPEOF(x) == LGLSXP);
    PROTECT(sx = (x_real || x_int) ? duplicate(x) : coerceVector(x, REALSXP));
    SET_ATTRIB(sx, R_NilValue);
    SET_OBJECT(sx, 0);
    /* if x has names, drop them, since they won't be ordered
       if (!isNull(getAttrib(sx, R_NamesSymbol)))
	   setAttrib(sx, R_NamesSymbol, R_NilValue); */
    indx_ret = asLogical(CADR(args));
    n = LENGTH(x);
    if(x_int) ivx = INTEGER(sx); else vx = REAL(sx);
    if(indx_ret) {
	SEXP ans, ansnames, indx;
	int i, *ix;
	/* answer will have x = sorted x , ix = index :*/
	PROTECT(ans      = allocVector(VECSXP, 2));
	PROTECT(ansnames = allocVector(STRSXP, 2));
	PROTECT(indx = allocVector(INTSXP, n));
	ix = INTEGER(indx);
	for(i = 0; i < n; i++)
	    ix[i] = i+1;

	if(x_int)
	    R_qsort_int_I(ivx, ix, 1, n);
	else
	    R_qsort_I(vx, ix, 1, n);

	SET_VECTOR_ELT(ans, 0, sx);
	SET_VECTOR_ELT(ans, 1, indx);
	SET_STRING_ELT(ansnames, 0, mkChar("x"));
	SET_STRING_ELT(ansnames, 1, mkChar("ix"));
	setAttrib(ans, R_NamesSymbol, ansnames);
	UNPROTECT(4);
	return ans;
    }
    else {
	if(x_int)
	    R_qsort_int(ivx, 1, n);
	else
	    R_qsort(vx, 1, n);

	UNPROTECT(1);
	return sx;
    }
}
Exemplo n.º 3
0
void makeA(double *x, const int mdim, const int nsample, int *cat, int *a, 
           int *b) {
    /* makeA() constructs the mdim by nsample integer array a.  For each 
       numerical variable with values x(m, n), n=1, ...,nsample, the x-values 
       are sorted from lowest to highest.  Denote these by xs(m, n).  Then 
       a(m,n) is the case number in which xs(m, n) occurs. The b matrix is 
       also contructed here.  If the mth variable is categorical, then 
       a(m, n) is the category of the nth case number. */
    int i, j, n1, n2;
    double *v= (double *) calloc(nsample, sizeof(double));
    int *index = (int *) calloc(nsample, sizeof(int));

    for (i = 0; i < mdim; ++i) {
        if (cat[i] == 1) { /* numerical predictor */
            for (j = 0; j < nsample; ++j) {
                v[j] = x[i + j * mdim];
                index[j] = j + 1;
            }
            R_qsort_I(v, index, 1, nsample);

            /*  this sorts the v(n) in ascending order. index(n) is the case 
                number of that v(n) nth from the lowest (assume the original 
                case numbers are 1,2,...).  */
            for (j = 0; j < nsample-1; ++j) {
                n1 = index[j];
                n2 = index[j + 1];
                a[i + j * mdim] = n1;
                if (j == 0) b[i + (n1-1) * mdim] = 1;
                b[i + (n2-1) * mdim] =  (v[j] < v[j + 1]) ?
                    b[i + (n1-1) * mdim] + 1 : b[i + (n1-1) * mdim];
            }
            a[i + (nsample-1) * mdim] = index[nsample-1];
        } else { /* categorical predictor */
            for (j = 0; j < nsample; ++j) 
                a[i + j*mdim] = (int) x[i + j * mdim];
        }
    }
    free(index);
    free(v);
}
Exemplo n.º 4
0
/* Chow-Liu structure learning algorithm. */
SEXP chow_liu(SEXP data, SEXP nodes, SEXP estimator, SEXP whitelist,
    SEXP blacklist, SEXP conditional, SEXP debug) {

int i = 0, j = 0, k = 0, debug_coord[2], ncol = length(data);
int num = length(VECTOR_ELT(data, 0)), narcs = 0, nwl = 0, nbl = 0;
int *nlevels = NULL, clevels = 0, *est = INTEGER(estimator), *depth = NULL;
int *wl = NULL, *bl = NULL, *poset = NULL, debuglevel = isTRUE(debug);
void **columns = NULL, *cond = NULL;
short int *include = NULL;
double *mim = NULL, *means = NULL, *sse = NULL;
SEXP arcs, wlist, blist;

  /* dereference the columns of the data frame. */
  DEREFERENCE_DATA_FRAME()

  /* only TAN uses a conditional variable, so assume it's discrete and go ahead. */
  if (conditional != R_NilValue) {

    cond = (void *) INTEGER(conditional);
    clevels = NLEVELS(conditional);

  }/*THEN*/

  /* allocate the mutual information matrix and the status vector. */
  mim = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(double));
  include = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(short int));

  /* compute the pairwise mutual information coefficients. */
  if (debuglevel > 0)
    Rprintf("* computing pairwise mutual information coefficients.\n");

  mi_matrix(mim, columns, ncol, nlevels, &num, cond, &clevels, means, sse, est);

  LIST_MUTUAL_INFORMATION_COEFS();

  /* add whitelisted arcs first. */
  if ((!isNull(whitelist)) && (length(whitelist) > 0)) {

    PROTECT(wlist = arc_hash(whitelist, nodes, TRUE, TRUE));
    wl = INTEGER(wlist);
    nwl = length(wlist);

    for (i = 0; i < nwl; i++) {

      if (debuglevel > 0) {

        Rprintf("* adding whitelisted arcs first.\n");

        if (include[wl[i]] == 0) {

          Rprintf("  > arc %s - %s has been added to the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*THEN*/
        else {

          Rprintf("  > arc %s - %s was already present in the graph.\n",
            CHAR(STRING_ELT(whitelist, i)), CHAR(STRING_ELT(whitelist, i + nwl)));

        }/*ELSE*/

      }/*THEN*/

      /* update the counter if need be. */
      if (include[wl[i]] == 0)
        narcs++;
      /* include the arc in the graph. */
      include[wl[i]] = 1;

    }/*FOR*/

    UNPROTECT(1);

  }/*THEN*/

  /* cache blacklisted arcs. */
  if ((!isNull(blacklist)) && (length(blacklist) > 0)) {

    PROTECT(blist = arc_hash(blacklist, nodes, TRUE, TRUE));
    bl = INTEGER(blist);
    nbl = length(blist);

  }/*THEN*/

  /* sort the mutual information coefficients and keep track of the elements' index.  */
  poset = Calloc1D(UPTRI3_MATRIX(ncol), sizeof(int));
  for (i = 0; i < UPTRI3_MATRIX(ncol); i++)
    poset[i] = i;
  R_qsort_I(mim, poset, 1, UPTRI3_MATRIX(ncol));

  depth = Calloc1D(ncol, sizeof(int));

  for (i = UPTRI3_MATRIX(ncol) - 1; i >= 0; i--) {

    /* get back the coordinates from the position in the half-matrix. */
    INV_UPTRI3(poset[i], ncol, debug_coord);

    /* already included all the arcs we had to, exiting. */
    if (narcs >= ncol - 1)
      break;
    /* arc already present in the graph, nothing to do. */
    if (include[poset[i]] == 1)
      continue;

    if (bl) {

      if (chow_liu_blacklist(bl, &nbl, poset + i)) {

        if (debuglevel > 0) {

          Rprintf("* arc %s - %s is blacklisted, skipping.\n",
            NODE(debug_coord[0]), NODE(debug_coord[1]));

        }/*THEN*/

        continue;

      }/*THEN*/

    }/*THEN*/

    if (c_uptri3_path(include, depth, debug_coord[0], debug_coord[1], ncol,
          nodes, FALSE)) {

      if (debuglevel > 0) {

        Rprintf("* arc %s - %s introduces cycles, skipping.\n",
          NODE(debug_coord[0]), NODE(debug_coord[1]));

      }/*THEN*/

      continue;

    }/*THEN*/

    if (debuglevel > 0) {

      Rprintf("* adding arc %s - %s with mutual information %lf.\n",
        NODE(debug_coord[0]), NODE(debug_coord[1]), mim[i]);

    }/*THEN*/

    /* include the arc in the graph. */
    include[poset[i]] = 1;
    /* update the counter. */
    narcs++;

  }/*FOR*/

  if ((!isNull(blacklist)) && (length(blacklist) > 0))
    UNPROTECT(1);

  /* sanity check for blacklist-related madnes. */
  if (narcs != ncol - 1)
    error("learned %d arcs instead of %d, this is not a tree spanning all the nodes.",
      narcs, ncol - 1);

  CONVERT_TO_ARC_SET(include, 0, 2 * (ncol - 1));

  Free1D(depth);
  Free1D(mim);
  Free1D(include);
  Free1D(poset);
  Free1D(columns);
  if (nlevels)
    Free1D(nlevels);
  if (means)
    Free1D(means);
  if (sse)
    Free1D(sse);

  return arcs;

}/*CHOW_LIU*/
Exemplo n.º 5
0
/* R function  qsort(x, index.return) */
SEXP attribute_hidden do_qsort(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    SEXP x, sx;
    int indx_ret;
    double *vx = NULL;
    int *ivx = NULL;
    Rboolean x_real, x_int;

    checkArity(op, args);
    x = CAR(args);
    if (!isNumeric(x))
	error(_("argument is not a numeric vector"));
    x_real= TYPEOF(x) == REALSXP;
    x_int = !x_real && (TYPEOF(x) == INTSXP || TYPEOF(x) == LGLSXP);
    PROTECT(sx = (x_real || x_int) ? duplicate(x) : coerceVector(x, REALSXP));
    SET_ATTRIB(sx, R_NilValue);
    SET_OBJECT(sx, 0);
    indx_ret = asLogical(CADR(args));
    R_xlen_t n = XLENGTH(x);
#ifdef LONG_VECTOR_SUPPORT
    Rboolean isLong = n > INT_MAX;
#endif
    if(x_int) ivx = INTEGER(sx); else vx = REAL(sx);
    if(indx_ret) {
	SEXP ans, ansnames, indx;
	/* answer will have x = sorted x , ix = index :*/
	PROTECT(ans = allocVector(VECSXP, 2));
	PROTECT(ansnames = allocVector(STRSXP, 2));
#ifdef LONG_VECTOR_SUPPORT
	if (isLong) {
	    PROTECT(indx = allocVector(REALSXP, n));
	    double *ix = REAL(indx);
	    for(R_xlen_t i = 0; i < n; i++) ix[i] = (double) (i+1);
	    if(x_int) R_qsort_int_R(ivx, ix, 1, n);
	    else R_qsort_R(vx, ix, 1, n);
	} else
#endif
	{
	    PROTECT(indx = allocVector(INTSXP, n));
	    int *ix = INTEGER(indx);
	    int nn = (int) n;
	    for(int i = 0; i < nn; i++) ix[i] = i+1;
	    if(x_int) R_qsort_int_I(ivx, ix, 1, nn);
	    else R_qsort_I(vx, ix, 1, nn);
	}

	SET_VECTOR_ELT(ans, 0, sx);
	SET_VECTOR_ELT(ans, 1, indx);
	SET_STRING_ELT(ansnames, 0, mkChar("x"));
	SET_STRING_ELT(ansnames, 1, mkChar("ix"));
	setAttrib(ans, R_NamesSymbol, ansnames);
	UNPROTECT(4);
	return ans;
    } else {
	if(x_int)
	    R_qsort_int(ivx, 1, n);
	else
	    R_qsort(vx, 1, n);

	UNPROTECT(1);
	return sx;
    }
}
Exemplo n.º 6
0
/* These are exposed in Utils.h and are misguidely in the API */
void F77_SUB(qsort4)(double *v, int *indx, int *ii, int *jj)
{
    R_qsort_I(v, indx, *ii, *jj);
}
Exemplo n.º 7
0
void findBestSplit(double *x, int *jdex, double *y, int mdim, int nsample,
        int ndstart, int ndend, int *msplit, double *decsplit,
        double *ubest, int *ndendl, int *jstat, int mtry,
        double sumnode, int nodecnt, int *cat) {
    int last, ncat[32], icat[32], lc, nl, nr, npopl, npopr;
    int i, j, kv, l;
    static int *mind, *ncase;
    static double *xt, *ut, *v, *yl;
    double sumcat[32], avcat[32], tavcat[32], ubestt;
    double crit, critmax, critvar, suml, sumr, d, critParent;
    
    
    if (in_findBestSplit==-99){
      free(ncase);
      free(mind); //had to remove this so that it wont crash for when mdim=0, strangely happened for replace=0
      free(v);
      free(yl);
      free(xt);
      free(ut);
     //	PRINTF("giving up mem in findBestSplit\n");
      return;
    }			
    
    if (in_findBestSplit==0){
    	in_findBestSplit=1;
		ut = (double *) calloc(nsample, sizeof(double));
		xt = (double *) calloc(nsample, sizeof(double));
		v  = (double *) calloc(nsample, sizeof(double));
		yl = (double *) calloc(nsample, sizeof(double));
		mind  = (int *) calloc(mdim+1, sizeof(int));   //seems that the sometimes i am asking for kv[10] and that causes problesmms
													   //so allocate 1 more. helps with not crashing in windows
		ncase = (int *) calloc(nsample, sizeof(int));
    }
    zeroDouble(ut, nsample);
    zeroDouble(xt, nsample);
    zeroDouble(v, nsample);
    zeroDouble(yl, nsample);
    zeroInt(mind, mdim);
    zeroInt(ncase, nsample);
    
    zeroDouble(avcat, 32);
    zeroDouble(tavcat, 32);
    
    /* START BIG LOOP */
    *msplit = -1;
    *decsplit = 0.0;
    critmax = 0.0;
    ubestt = 0.0;
    for (i=0; i < mdim; ++i) mind[i] = i;
    
    last = mdim - 1;
    for (i = 0; i < mtry; ++i) {
        critvar = 0.0;
        j = (int) (unif_rand() * (last+1));
        //PRINTF("j=%d, last=%d mind[j]=%d\n", j, last, mind[j]);fflush(stdout);
        kv = mind[j];
		//if(kv>100){
		//	1;
		//	getchar();
		//}
        swapInt(mind[j], mind[last]);
        /* mind[j] = mind[last];
         * mind[last] = kv; */
        last--;
        
        lc = cat[kv];
        if (lc == 1) {
            /* numeric variable */
            for (j = ndstart; j <= ndend; ++j) {
                xt[j] = x[kv + (jdex[j] - 1) * mdim];
                yl[j] = y[jdex[j] - 1];
            }
        } else {
            /* categorical variable */
            zeroInt(ncat, 32);
            zeroDouble(sumcat, 32);
            for (j = ndstart; j <= ndend; ++j) {
                l = (int) x[kv + (jdex[j] - 1) * mdim];
                sumcat[l - 1] += y[jdex[j] - 1];
                ncat[l - 1] ++;
            }
            /* Compute means of Y by category. */
            for (j = 0; j < lc; ++j) {
                avcat[j] = ncat[j] ? sumcat[j] / ncat[j] : 0.0;
            }
            /* Make the category mean the `pseudo' X data. */
            for (j = 0; j < nsample; ++j) {
                xt[j] = avcat[(int) x[kv + (jdex[j] - 1) * mdim] - 1];
                yl[j] = y[jdex[j] - 1];
            }
        }
        /* copy the x data in this node. */
        for (j = ndstart; j <= ndend; ++j) v[j] = xt[j];
        for (j = 1; j <= nsample; ++j) ncase[j - 1] = j;
        R_qsort_I(v, ncase, ndstart + 1, ndend + 1);
        if (v[ndstart] >= v[ndend]) continue;
        /* ncase(n)=case number of v nth from bottom */
        /* Start from the right and search to the left. */
        critParent = sumnode * sumnode / nodecnt;
        suml = 0.0;
        sumr = sumnode;
        npopl = 0;
        npopr = nodecnt;
        crit = 0.0;
        /* Search through the "gaps" in the x-variable. */
        for (j = ndstart; j <= ndend - 1; ++j) {
            d = yl[ncase[j] - 1];
            suml += d;
            sumr -= d;
            npopl++;
            npopr--;
            if (v[j] < v[j+1]) {
                crit = (suml * suml / npopl) + (sumr * sumr / npopr) -
                        critParent;
                if (crit > critvar) {
                    ubestt = (v[j] + v[j+1]) / 2.0;
                    critvar = crit;
                }
            }
        }
        if (critvar > critmax) {
            *ubest = ubestt;
            *msplit = kv + 1;
            critmax = critvar;
            for (j = ndstart; j <= ndend; ++j) {
                ut[j] = xt[j];
            }
            if (cat[kv] > 1) {
                for (j = 0; j < cat[kv]; ++j) tavcat[j] = avcat[j];
            }
        }
    }
    *decsplit = critmax;
    
    /* If best split can not be found, set to terminal node and return. */
    if (*msplit != -1) {
        nl = ndstart;
        for (j = ndstart; j <= ndend; ++j) {
            if (ut[j] <= *ubest) {
                nl++;
                ncase[nl-1] = jdex[j];
            }
        }
        *ndendl = imax2(nl - 1, ndstart);
        nr = *ndendl + 1;
        for (j = ndstart; j <= ndend; ++j) {
            if (ut[j] > *ubest) {
                if (nr >= nsample) break;
                nr++;
                ncase[nr - 1] = jdex[j];
            }
        }
        if (*ndendl >= ndend) *ndendl = ndend - 1;
        for (j = ndstart; j <= ndend; ++j) jdex[j] = ncase[j];
        
        lc = cat[*msplit - 1];
        if (lc > 1) {
            for (j = 0; j < lc; ++j) {
                icat[j] = (tavcat[j] < *ubest) ? 1 : 0;
            }
            *ubest = pack(lc, icat);
        }
    } else *jstat = 1;
    
}