Example #1
0
File: sim.c Project: cran/VLMC
SEXP vlmc_sim(SEXP vlmc_R, SEXP nsim_)
{
/* Given a fitted VLMC (with |alphabet| = m), simulate a new series y[1..N],
   where N = data_len */

    int N = asInteger(nsim_), next_ind = 0, nprot = 1;
    if (!isInteger(vlmc_R)) {
	vlmc_R = PROTECT(coerceVector(vlmc_R, INTSXP)); nprot++;
    }

    SEXP res = PROTECT(allocVector(INTSXP, N)); // the result
    int *y = INTEGER(res),
	m = INTEGER(vlmc_R)[0]; // =  |alphabet|

    node_t *top = load_tree(INTEGER(vlmc_R), &next_ind, LENGTH(vlmc_R),
		    /*level*/ 0, /*Debug*/ 0);
    GetRNGstate();

    for (int i = 0; i < N; i++) {
	/* Find the context, descending the tree, given y[i-1], y[i-2],... : */
	node_t *this, *temp;
	int j;
	for (j = 1, this = top;
	     j <= i && (temp = this->child[y[i - j]]) != NULL;
	     j++, this = temp) ;

	int count = 0;
	double r = (double) this->total * unif_rand();
	for (j = 0; j < m; j++) {
	    count += this->count[j];
	    if (r <= count) {
		y[i] = j;	break;
	    }
	}
    }
    PutRNGstate();

    free_node(top);/* do not leak ! */
    UNPROTECT(nprot);
    return res;
}
void permuteOOB(int m, double *x, int *in, int nsample, int mdim) {
/* Permute the OOB part of a variable in x. 
 * Argument:
 *   m: the variable to be permuted
 *   x: the data matrix (variables in rows)
 *   in: vector indicating which case is OOB
 *   nsample: number of cases in the data
 *   mdim: number of variables in the data
 */
    double *tp, tmp;
    int i, last, k, nOOB = 0;
    
    tp = (double *)  calloc(nsample, sizeof(double));

    for (i = 0; i < nsample; ++i) {
	/* make a copy of the OOB part of the data into tp (for permuting) */
	if (in[i] == 0) {
            tp[nOOB] = x[m + i*mdim];
            nOOB++;
        }
    }
    /* Permute tp */
    last = nOOB;
    for (i = 0; i < nOOB; ++i) {
	k = (int) (last * unif_rand());
	tmp = tp[last - 1];
	tp[last - 1] = tp[k];
	tp[k] = tmp;
	last--;
    }

    /* Copy the permuted OOB data back into x. */
    nOOB = 0;
    for (i = 0; i < nsample; ++i) {
	if (in[i] == 0) {
            x[m + i*mdim] = tp[nOOB];
            nOOB++;
	}
    }
    free(tp);
}
Example #3
0
/* Written by William Constantine     */
mutil_errcode mutil_rand_uniform( void *rand_ptr, double *num_out )
{
  MUTIL_TRACE("Start mutil_rand_uniform()");

  /* avoid lint warning */
  (void) rand_ptr;

  if( !num_out ) {
    MUTIL_ERROR( "NULL pointer for output" );
    return MUTIL_ERR_NULL_POINTER;
  }

  /* do not allow function to return endpoints */

  do {
    *num_out = unif_rand();
  } while( *num_out == 0 || *num_out == 1 );

  MUTIL_TRACE("Done with mutil_rand_uniform()");
  return MUTIL_ERR_OK;
}
double rsignrank(double n)
{
    int i, k;
    double r;

#ifdef IEEE_754
    /* NaNs propagated correctly */
    if (ISNAN(n)) return(n);
#endif
    n = floor(n + 0.5);
    if (n < 0) ML_ERR_return_NAN;

    if (n == 0)
	return(0);
    r = 0.0;
    k = (int) n;
    for (i = 0; i < k; ) {
	r += (++i) * floor(unif_rand() + 0.5);
    }
    return(r);
}
Example #5
0
int *SampleWithoutReplace(int n, int k, int *result, int *buffer)
{
    if (result == NULL)
        result = (int *) R_alloc(k, sizeof(int));
    if (buffer == NULL)
        buffer = (int *) R_alloc(n, sizeof(int));

    for (int i = 0; i < n; i++)
        buffer[i] = i;

    GetRNGstate();
    for (int i = 0; i < k; i++)
    {
        int j = n * unif_rand();
        result[i] = buffer[j];
        buffer[j] = buffer[--n];
    }
    PutRNGstate();

    return result;
}
Example #6
0
File: sample.c Project: cran/eco
/* Grid method samping from tomography line*/
void rGrid(
	   double *Sample,         /* W_i sampled from each tomography line */                 
	   double *W1gi,           /* The grid lines of W1[i] */
	   double *W2gi,           /* The grid lines of W2[i] */
	   int ni_grid,            /* number of grids for observation i*/
	   double *mu,             /* mean vector for normal */ 
	   double **InvSigma,      /* Inverse covariance matrix for normal */
	   int n_dim)              /* dimension of parameters */
{
  int j;
  double dtemp;
  double *vtemp=doubleArray(n_dim);
  double *prob_grid=doubleArray(ni_grid);     /* density by grid */
  double *prob_grid_cum=doubleArray(ni_grid); /* cumulative density by grid */
    
  dtemp=0;
  for (j=0;j<ni_grid;j++){
    vtemp[0]=log(W1gi[j])-log(1-W1gi[j]);
    vtemp[1]=log(W2gi[j])-log(1-W2gi[j]);
    prob_grid[j]=dMVN(vtemp, mu, InvSigma, n_dim, 1) -
      log(W1gi[j])-log(W2gi[j])-log(1-W1gi[j])-log(1-W2gi[j]);
    prob_grid[j]=exp(prob_grid[j]);
    dtemp+=prob_grid[j];
    prob_grid_cum[j]=dtemp;
  }
  for (j=0;j<ni_grid;j++)
    prob_grid_cum[j]/=dtemp; /*standardize prob.grid */

  /*2 sample W_i on the ith tomo line */
  j=0;
  dtemp=unif_rand();
  while (dtemp > prob_grid_cum[j]) j++;
  Sample[0]=W1gi[j];
  Sample[1]=W2gi[j];

  free(vtemp);
  free(prob_grid);
  free(prob_grid_cum);

}
Example #7
0
void TestSetError(double *countts, int *jts, int *clts, int *jet, int ntest,
        int nclass, int nvote, double *errts,
        int labelts, int *nclts, double *cutoff) {
    int j, n, ntie;
    double cmax, crit;
    
    for (n = 0; n < ntest; ++n) countts[jts[n]-1 + n*nclass] += 1.0;
    
    /*  Prediction is the class with the maximum votes */
    for (n = 0; n < ntest; ++n) {
        cmax=0.0;
        ntie = 1;
        for (j = 0; j < nclass; ++j) {
            crit = (countts[j + n*nclass] / nvote) / cutoff[j];
            if (crit > cmax) {
                jet[n] = j+1;
                cmax = crit;
            }
            /*  Break ties at random: */
            if (crit == cmax) {
                ntie++;
                if (unif_rand() > 1.0 / ntie) {
                    jet[n] = j+1;
                    cmax = crit;
                }
            }
        }
    }
    if (labelts) {
        zeroDouble(errts, nclass + 1);
        for (n = 0; n < ntest; ++n) {
            if (jet[n] != clts[n]) {
                errts[0] += 1.0;
                errts[clts[n]] += 1.0;
            }
        }
        errts[0] /= ntest;
        for (n = 1; n <= nclass; ++n) errts[n] /= nclts[n-1];
    }
}
Example #8
0
File: sample.c Project: cran/eco
/* sample W via MH for 2x2 table */
void rMH(
	 double *W,              /* previous draws */
	 double *XY,             /* X_i and Y_i */
	 double W1min,           /* lower bound for W1 */
	 double W1max,           /* upper bound for W1 */
	 double *mu,            /* mean vector for normal */ 
	 double **InvSigma,     /* Inverse covariance matrix for normal */
	 int n_dim)              /* dimension of parameters */
{
  int j;
  double dens1, dens2, ratio;
  double *Sample = doubleArray(n_dim);
  double *vtemp = doubleArray(n_dim);
  double *vtemp1 = doubleArray(n_dim);
  
  /* sample W_1 from unif(W1min, W1max) */
  Sample[0] = runif(W1min, W1max);
  Sample[1] = XY[1]/(1-XY[0])-Sample[0]*XY[0]/(1-XY[0]);
  for (j = 0; j < n_dim; j++) {
    vtemp[j] = log(Sample[j])-log(1-Sample[j]);
    vtemp1[j] = log(W[j])-log(1-W[j]);
  }
  /* acceptance ratio */
  dens1 = dMVN(vtemp, mu, InvSigma, n_dim, 1) -
    log(Sample[0])-log(Sample[1])-log(1-Sample[0])-log(1-Sample[1]);
  dens2 = dMVN(vtemp1, mu, InvSigma, n_dim, 1) -
    log(W[0])-log(W[1])-log(1-W[0])-log(1-W[1]);
  ratio = fmin2(1, exp(dens1-dens2));
  
  /* accept */
  if (unif_rand() < ratio) 
    for (j=0; j<n_dim; j++) 
      W[j]=Sample[j];
  
  free(Sample);
  free(vtemp);
  free(vtemp1);
}
Example #9
0
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// copula for data with missing values 
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void copula_NA( double Z[], double K[], int R[], int not_continuous[], int *n, int *p )
{
    int number = *n, dim = *p, nxp = number * dim, dimp1 = dim + 1;
    
    #pragma omp parallel
    {	
        double sigma, sd_j, mu_ij, lb, ub, runif_value, pnorm_lb, pnorm_ub;
        int i, j;
        
        #pragma omp for
        for( int counter = 0; counter < nxp; counter++ )
        {   
            j = counter / number;
            i = counter % number;
            
            if( not_continuous[ j ] )
            {
                sigma = 1.0 / K[ j * dimp1 ]; // 1.0 / K[ j * dim + j ];
                sd_j  = sqrt( sigma );
                
                get_mean( Z, K, &mu_ij, &sigma, &i, &j, &number, &dim );
                
                if( R[ counter ] != 0 )
                {
                    get_bounds_NA( Z, R, &lb, &ub, &i, &j, &number );
                    
                    pnorm_lb     = Rf_pnorm5( lb, mu_ij, sd_j, TRUE, FALSE );
                    pnorm_ub     = Rf_pnorm5( ub, mu_ij, sd_j, TRUE, FALSE );
                    //runif_value = runif( pnorm_lb, pnorm_ub );
                    runif_value  = pnorm_lb + unif_rand() * ( pnorm_ub - pnorm_lb );
                    Z[ counter ] = Rf_qnorm5( runif_value, mu_ij, sd_j, TRUE, FALSE );
                }else
                    Z[ counter ] = mu_ij + norm_rand() * sd_j;  // rnorm( mu_ij, sd_j );
            }
        }
    }
}
Example #10
0
void R_max_col(double *matrix, int *nr, int *nc, int *maxes)
{
    int	  r, c, m, ntie, n_r = *nr;
    double a, b, tol, large;
    Rboolean isna, used_random=FALSE;

    for (r = 0; r < n_r; r++) {
	/* first check row for any NAs and find the largest entry */
	large = 0.0;
	isna = FALSE;
	for (c = 0; c < *nc; c++) {
	    a = matrix[r + c * n_r];
	    if (ISNAN(a)) { isna = TRUE; break; }
	    large = fmax2(large, fabs(a));
	}
	if (isna) { maxes[r] = NA_INTEGER; continue; }
	tol = RELTOL * large;

	m = 0;
	ntie = 1;
	a = matrix[r];
	for (c = 1; c < *nc; c++) {
	    b = matrix[r + c * n_r];
	    if (b >= a + tol) {
		ntie = 1;
		a = b;
		m = c;
	    } else if (b >= a - tol) {
		ntie++;    
		if (!used_random) { GetRNGstate(); used_random = TRUE; }
		if (ntie * unif_rand() < 1.) m = c;
	    }
	}
	maxes[r] = m + 1;
    }
    if(used_random) PutRNGstate();
}
Example #11
0
int sampleTransaction(int T, int *weights) {

	int randomWeight;
	int imax = T - 1;
	int imin = 0;
	int imid;

	/* We select a weight value randomly */

	randomWeight = ((int) (unif_rand() * (weights[ T - 1 ]))) + 1;

	/* We identify the first transaction whose cumulative
	 * weight is larger or equal to randomWeight */

	/* We look for the element using binary search */

	while (imax >= imin) {

		imid = imin + ((imax - imin) / 2);

		if (weights[ imid ] < randomWeight)
			imin = imid + 1;
		else if (weights[ imid ] > randomWeight)
			imax = imid - 1;
		else {
			while (imid > 0 && weights[ imid - 1 ] == randomWeight)
				imid--;

			return imid;
		}
	}

	if (weights[ imid ] > randomWeight)
		return imid;

	return imid + 1;
}
Example #12
0
double r_random_walk(double alpha, double tau, double beta, double delta)
{
  double dt=0.0001;
  double t,sigma=1;
  double p = .5 * (1+((delta*sqrt(dt))/sigma));
  double a;
  //double q = .5 * (1-((mu*sqrt(dt))/sigma));
  int i = 0;
  double y = beta*alpha;

  while(y < alpha && y > 0)
  {
    GetRNGstate();
    a = unif_rand();
    PutRNGstate();
    if(a <= p) y = y + sigma*sqrt(dt);
    else y = y - sigma*sqrt(dt);
    i++;
  }
  if(y >= alpha) t = (i*dt+tau);
  else t = -(i*dt+tau);

  return t;
}
Example #13
0
/**
 * Generate the direction for "running Shake-and-Bake" according to 1.3.3 of
 * Boender et al. (1991)
 */
void hitandrun_rsabDir(double *d, Matrix *constr, int index) {
	const int inc1 = 1; // for BLAS

	int n = constr->nCol - 1;

	double c[n]; // the constraint vector
	for (int i = 0; i < n; ++i) {
		c[i] = *get(constr, index, i);
	}

  if (n == 1) {
    d[0] = -c[0];
    return;
  }

	double r = root(unif_rand(), n - 1);
	hitandrun_randDir(d, n); // \~{u} in the paper

	double cd = F77_CALL(ddot)(&n, c, &inc1, d, &inc1);
	double fd = r / sqrt(1 - cd * cd);
	double fc = -(r * cd / sqrt(1 - cd * cd) + sqrt(1 - r * r));
	F77_CALL(dscal)(&n, &fd, d, &inc1); // d := fd * d
	F77_CALL(daxpy)(&n, &fc, c, &inc1, d, &inc1); // d := fc * c + d
}
Example #14
0
void rrand_(double *r) { *r = unif_rand(); }
Example #15
0
void classRF(double *x, int *dimx, int *cl, int *ncl, int *cat, int *maxcat,
        int *sampsize, int *strata, int *Options, int *ntree, int *nvar,
        int *ipi, double *classwt, double *cut, int *nodesize,
        int *outcl, int *counttr, double *prox,
        double *imprt, double *impsd, double *impmat, int *nrnodes,
        int *ndbigtree, int *nodestatus, int *bestvar, int *treemap,
        int *nodeclass, double *xbestsplit, double *errtr,
        int *testdat, double *xts, int *clts, int *nts, double *countts,
        int *outclts, int labelts, double *proxts, double *errts,
        int *inbag) {
    /******************************************************************
     *  C wrapper for random forests:  get input from R and drive
     *  the Fortran routines.
     *
     *  Input:
     *
     *  x:        matrix of predictors (transposed!)
     *  dimx:     two integers: number of variables and number of cases
     *  cl:       class labels of the data
     *  ncl:      number of classes in the responsema
     *  cat:      integer vector of number of classes in the predictor;
     *            1=continuous
     * maxcat:    maximum of cat
     * Options:   7 integers: (0=no, 1=yes)
     *     add a second class (for unsupervised RF)?
     *         1: sampling from product of marginals
     *         2: sampling from product of uniforms
     *     assess variable importance?
     *     calculate proximity?
     *     calculate proximity based on OOB predictions?
     *     calculate outlying measure?
     *     how often to print output?
     *     keep the forest for future prediction?
     *  ntree:    number of trees
     *  nvar:     number of predictors to use for each split
     *  ipi:      0=use class proportion as prob.; 1=use supplied priors
     *  pi:       double vector of class priors
     *  nodesize: minimum node size: no node with fewer than ndsize
     *            cases will be split
     *
     *  Output:
     *
     *  outcl:    class predicted by RF
     *  counttr:  matrix of votes (transposed!)
     *  imprt:    matrix of variable importance measures
     *  impmat:   matrix of local variable importance measures
     *  prox:     matrix of proximity (if iprox=1)
     ******************************************************************/
    
    int nsample0, mdim, nclass, addClass, mtry, ntest, nsample, ndsize,
            mimp, nimp, near, nuse, noutall, nrightall, nrightimpall,
            keepInbag, nstrata;
    int jb, j, n, m, k, idxByNnode, idxByNsample, imp, localImp, iprox,
            oobprox, keepf, replace, stratify, trace, *nright,
            *nrightimp, *nout, *nclts, Ntree;
    
    int *out, *bestsplitnext, *bestsplit, *nodepop, *jin, *nodex,
            *nodexts, *nodestart, *ta, *ncase, *jerr, *varUsed,
            *jtr, *classFreq, *idmove, *jvr,
            *at, *a, *b, *mind, *nind, *jts, *oobpair;
    int **strata_idx, *strata_size, last, ktmp, anyEmpty, ntry;
    
    double av=0.0;
    
    double *tgini, *tx, *wl, *classpop, *tclasscat, *tclasspop, *win,
            *tp, *wr;
    
    //Do initialization for COKUS's Random generator
    seedMT(2*rand()+1);  //works well with odd number so why don't use that
    
    addClass = Options[0];
    imp      = Options[1];
    localImp = Options[2];
    iprox    = Options[3];
    oobprox  = Options[4];
    trace    = Options[5];
    keepf    = Options[6];
    replace  = Options[7];
    stratify = Options[8];
    keepInbag = Options[9];
    mdim     = dimx[0];
    nsample0 = dimx[1];
    nclass   = (*ncl==1) ? 2 : *ncl;
    ndsize   = *nodesize;
    Ntree    = *ntree;
    mtry     = *nvar;
    ntest    = *nts;
    nsample = addClass ? (nsample0 + nsample0) : nsample0;
    mimp = imp ? mdim : 1;
    nimp = imp ? nsample : 1;
    near = iprox ? nsample0 : 1;
    if (trace == 0) trace = Ntree + 1;
    
    /*printf("\nmdim %d, nclass %d, nrnodes %d, nsample %d, ntest %d\n", mdim, nclass, *nrnodes, nsample, ntest);
    printf("\noobprox %d, mdim %d, nsample0 %d, Ntree %d, mtry %d, mimp %d", oobprox, mdim, nsample0, Ntree, mtry, mimp);
    printf("\nstratify %d, replace %d",stratify,replace);
    printf("\n");*/
    tgini =      (double *) S_alloc_alt(mdim, sizeof(double));
    wl =         (double *) S_alloc_alt(nclass, sizeof(double));
    wr =         (double *) S_alloc_alt(nclass, sizeof(double));
    classpop =   (double *) S_alloc_alt(nclass* *nrnodes, sizeof(double));
    tclasscat =  (double *) S_alloc_alt(nclass*32, sizeof(double));
    tclasspop =  (double *) S_alloc_alt(nclass, sizeof(double));
    tx =         (double *) S_alloc_alt(nsample, sizeof(double));
    win =        (double *) S_alloc_alt(nsample, sizeof(double));
    tp =         (double *) S_alloc_alt(nsample, sizeof(double));
    
    out =           (int *) S_alloc_alt(nsample, sizeof(int));
    bestsplitnext = (int *) S_alloc_alt(*nrnodes, sizeof(int));
    bestsplit =     (int *) S_alloc_alt(*nrnodes, sizeof(int));
    nodepop =       (int *) S_alloc_alt(*nrnodes, sizeof(int));
    nodestart =     (int *) S_alloc_alt(*nrnodes, sizeof(int));
    jin =           (int *) S_alloc_alt(nsample, sizeof(int));
    nodex =         (int *) S_alloc_alt(nsample, sizeof(int));
    nodexts =       (int *) S_alloc_alt(ntest, sizeof(int));
    ta =            (int *) S_alloc_alt(nsample, sizeof(int));
    ncase =         (int *) S_alloc_alt(nsample, sizeof(int));
    jerr =          (int *) S_alloc_alt(nsample, sizeof(int));
    varUsed =       (int *) S_alloc_alt(mdim, sizeof(int));
    jtr =           (int *) S_alloc_alt(nsample, sizeof(int));
    jvr =           (int *) S_alloc_alt(nsample, sizeof(int));
    classFreq =     (int *) S_alloc_alt(nclass, sizeof(int));
    jts =           (int *) S_alloc_alt(ntest, sizeof(int));
    idmove =        (int *) S_alloc_alt(nsample, sizeof(int));
    at =            (int *) S_alloc_alt(mdim*nsample, sizeof(int));
    a =             (int *) S_alloc_alt(mdim*nsample, sizeof(int));
    b =             (int *) S_alloc_alt(mdim*nsample, sizeof(int));
    mind =          (int *) S_alloc_alt(mdim, sizeof(int));
    nright =        (int *) S_alloc_alt(nclass, sizeof(int));
    nrightimp =     (int *) S_alloc_alt(nclass, sizeof(int));
    nout =          (int *) S_alloc_alt(nclass, sizeof(int));
    if (oobprox) {
        oobpair = (int *) S_alloc_alt(near*near, sizeof(int));
    }
    //printf("nsample=%d\n", nsample);
    /* Count number of cases in each class. */
    zeroInt(classFreq, nclass);
    for (n = 0; n < nsample; ++n) classFreq[cl[n] - 1] ++;
    /* Normalize class weights. */
    //Rprintf("ipi %d ",*ipi);
    //for(n=0;n<nclass;n++) Rprintf("%d: %d, %f,",n,classFreq[n],classwt[n]);
    normClassWt(cl, nsample, nclass, *ipi, classwt, classFreq);
    //for(n=0;n<nclass;n++) Rprintf("%d: %d, %f,",n,classFreq[n],classwt[n]);
   
    if (stratify) {
        /* Count number of strata and frequency of each stratum. */
        nstrata = 0;
        for (n = 0; n < nsample0; ++n)
            if (strata[n] > nstrata) nstrata = strata[n];
        /* Create the array of pointers, each pointing to a vector
         * of indices of where data of each stratum is. */
        strata_size = (int  *) S_alloc_alt(nstrata, sizeof(int));
        for (n = 0; n < nsample0; ++n) {
            strata_size[strata[n] - 1] ++;
        }
        strata_idx =  (int **) S_alloc_alt(nstrata, sizeof(int *));
        for (n = 0; n < nstrata; ++n) {
            strata_idx[n] = (int *) S_alloc_alt(strata_size[n], sizeof(int));
        }
        zeroInt(strata_size, nstrata);
        for (n = 0; n < nsample0; ++n) {
            strata_size[strata[n] - 1] ++;
            strata_idx[strata[n] - 1][strata_size[strata[n] - 1] - 1] = n;
        }
    } else {
        nind = replace ? NULL : (int *) S_alloc_alt(nsample, sizeof(int));
    }
    
    /*    INITIALIZE FOR RUN */
    if (*testdat) zeroDouble(countts, ntest * nclass);
    zeroInt(counttr, nclass * nsample);
    zeroInt(out, nsample);
    zeroDouble(tgini, mdim);
    zeroDouble(errtr, (nclass + 1) * Ntree);
    
    if (labelts) {
        nclts  = (int *) S_alloc_alt(nclass, sizeof(int));
        for (n = 0; n < ntest; ++n) nclts[clts[n]-1]++;
        zeroDouble(errts, (nclass + 1) * Ntree);
    }
    //printf("labelts %d\n",labelts);fflush(stdout);
    if (imp) {
        zeroDouble(imprt, (nclass+2) * mdim);
        zeroDouble(impsd, (nclass+1) * mdim);
        if (localImp) zeroDouble(impmat, nsample * mdim);
    }
    if (iprox) {
        zeroDouble(prox, nsample0 * nsample0);
        if (*testdat) zeroDouble(proxts, ntest * (ntest + nsample0));
    }
    makeA(x, mdim, nsample, cat, at, b);
    
    //R_CheckUserInterrupt();
    
    
    /* Starting the main loop over number of trees. */
    GetRNGstate();
    if (trace <= Ntree) {
        /* Print header for running output. */
        Rprintf("ntree      OOB");
        for (n = 1; n <= nclass; ++n) Rprintf("%7i", n);
        if (labelts) {
            Rprintf("|    Test");
            for (n = 1; n <= nclass; ++n) Rprintf("%7i", n);
        }
        Rprintf("\n");
    }
    idxByNnode = 0;
    idxByNsample = 0;
    
    //Rprintf("addclass %d, ntree %d, cl[300]=%d", addClass,Ntree,cl[299]);
    for(jb = 0; jb < Ntree; jb++) {
		//Rprintf("addclass %d, ntree %d, cl[300]=%d", addClass,Ntree,cl[299]);
        //printf("jb=%d,\n",jb);
        /* Do we need to simulate data for the second class? */
        if (addClass) createClass(x, nsample0, nsample, mdim);
        do {
            zeroInt(nodestatus + idxByNnode, *nrnodes);
            zeroInt(treemap + 2*idxByNnode, 2 * *nrnodes);
            zeroDouble(xbestsplit + idxByNnode, *nrnodes);
            zeroInt(nodeclass + idxByNnode, *nrnodes);
            zeroInt(varUsed, mdim);
            /* TODO: Put all sampling code into a function. */
            /* drawSample(sampsize, nsample, ); */
            if (stratify) {  /* stratified sampling */
                zeroInt(jin, nsample);
                zeroDouble(tclasspop, nclass);
                zeroDouble(win, nsample);
                if (replace) {  /* with replacement */
                    for (n = 0; n < nstrata; ++n) {
                        for (j = 0; j < sampsize[n]; ++j) {
                            ktmp = (int) (unif_rand() * strata_size[n]);
                            k = strata_idx[n][ktmp];
                            tclasspop[cl[k] - 1] += classwt[cl[k] - 1];
                            win[k] += classwt[cl[k] - 1];
                            jin[k] = 1;
                        }
                    }
                } else { /* stratified sampling w/o replacement */
                    /* re-initialize the index array */
                    zeroInt(strata_size, nstrata);
                    for (j = 0; j < nsample; ++j) {
                        strata_size[strata[j] - 1] ++;
                        strata_idx[strata[j] - 1][strata_size[strata[j] - 1] - 1] = j;
                    }
                    /* sampling without replacement */
                    for (n = 0; n < nstrata; ++n) {
                        last = strata_size[n] - 1;
                        for (j = 0; j < sampsize[n]; ++j) {
                            ktmp = (int) (unif_rand() * (last+1));
                            k = strata_idx[n][ktmp];
                            swapInt(strata_idx[n][last], strata_idx[n][ktmp]);
                            last--;
                            tclasspop[cl[k] - 1] += classwt[cl[k]-1];
                            win[k] += classwt[cl[k]-1];
                            jin[k] = 1;
                        }
                    }
                }
            } else {  /* unstratified sampling */
                anyEmpty = 0;
                ntry = 0;
                do {
                    zeroInt(jin, nsample);
                    zeroDouble(tclasspop, nclass);
                    zeroDouble(win, nsample);
                    if (replace) {
                        for (n = 0; n < *sampsize; ++n) {
                            k = unif_rand() * nsample;
                            tclasspop[cl[k] - 1] += classwt[cl[k]-1];
                            win[k] += classwt[cl[k]-1];
                            jin[k] = 1;
                        }
                    } else {
                        for (n = 0; n < nsample; ++n) nind[n] = n;
                        last = nsample - 1;
                        for (n = 0; n < *sampsize; ++n) {
                            ktmp = (int) (unif_rand() * (last+1));
                            k = nind[ktmp];
                            swapInt(nind[ktmp], nind[last]);
                            last--;
                            tclasspop[cl[k] - 1] += classwt[cl[k]-1];
                            win[k] += classwt[cl[k]-1];
                            jin[k] = 1;
                        }
                    }
                    /* check if any class is missing in the sample */
                    for (n = 0; n < nclass; ++n) {
                        if (tclasspop[n] == 0) anyEmpty = 1;
                    }
                    ntry++;
                } while (anyEmpty && ntry <= 10);
            }
            
            /* If need to keep indices of inbag data, do that here. */
            if (keepInbag) {
                for (n = 0; n < nsample0; ++n) {
                    inbag[n + idxByNsample] = jin[n];
                }
            }
            
            /* Copy the original a matrix back. */
            memcpy(a, at, sizeof(int) * mdim * nsample);
            modA(a, &nuse, nsample, mdim, cat, *maxcat, ncase, jin);
            
            #ifdef WIN64
            F77_CALL(_buildtree)
            #endif
                    
            #ifndef WIN64
            F77_CALL(buildtree)
            #endif        
            (a, b, cl, cat, maxcat, &mdim, &nsample,
                    &nclass,
                    treemap + 2*idxByNnode, bestvar + idxByNnode,
                    bestsplit, bestsplitnext, tgini,
                    nodestatus + idxByNnode, nodepop,
                    nodestart, classpop, tclasspop, tclasscat,
                    ta, nrnodes, idmove, &ndsize, ncase,
                    &mtry, varUsed, nodeclass + idxByNnode,
                    ndbigtree + jb, win, wr, wl, &mdim,
                    &nuse, mind);
            /* if the "tree" has only the root node, start over */
        } while (ndbigtree[jb] == 1);
        
        Xtranslate(x, mdim, *nrnodes, nsample, bestvar + idxByNnode,
                bestsplit, bestsplitnext, xbestsplit + idxByNnode,
                nodestatus + idxByNnode, cat, ndbigtree[jb]);
        
        /*  Get test set error */
        if (*testdat) {
            predictClassTree(xts, ntest, mdim, treemap + 2*idxByNnode,
                    nodestatus + idxByNnode, xbestsplit + idxByNnode,
                    bestvar + idxByNnode,
                    nodeclass + idxByNnode, ndbigtree[jb],
                    cat, nclass, jts, nodexts, *maxcat);
            TestSetError(countts, jts, clts, outclts, ntest, nclass, jb+1,
                    errts + jb*(nclass+1), labelts, nclts, cut);
        }
        
        /*  Get out-of-bag predictions and errors. */
        predictClassTree(x, nsample, mdim, treemap + 2*idxByNnode,
                nodestatus + idxByNnode, xbestsplit + idxByNnode,
                bestvar + idxByNnode,
                nodeclass + idxByNnode, ndbigtree[jb],
                cat, nclass, jtr, nodex, *maxcat);
        
        zeroInt(nout, nclass);
        noutall = 0;
        for (n = 0; n < nsample; ++n) {
            if (jin[n] == 0) {
                /* increment the OOB votes */
                counttr[n*nclass + jtr[n] - 1] ++;
                /* count number of times a case is OOB */
                out[n]++;
                /* count number of OOB cases in the current iteration.
                 * nout[n] is the number of OOB cases for the n-th class.
                 * noutall is the number of OOB cases overall. */
                nout[cl[n] - 1]++;
                noutall++;
            }
        }
        
        /* Compute out-of-bag error rate. */
        oob(nsample, nclass, jin, cl, jtr, jerr, counttr, out,
                errtr + jb*(nclass+1), outcl, cut);
        
        if ((jb+1) % trace == 0) {
            Rprintf("%5i: %6.2f%%", jb+1, 100.0*errtr[jb * (nclass+1)]);
            for (n = 1; n <= nclass; ++n) {
                Rprintf("%6.2f%%", 100.0 * errtr[n + jb * (nclass+1)]);
            }
            if (labelts) {
                Rprintf("| ");
                for (n = 0; n <= nclass; ++n) {
                    Rprintf("%6.2f%%", 100.0 * errts[n + jb * (nclass+1)]);
                }
            }
            Rprintf("\n");
            
            //R_CheckUserInterrupt();
        }
        
        /*  DO VARIABLE IMPORTANCE  */
        if (imp) {
            nrightall = 0;
            /* Count the number of correct prediction by the current tree
             * among the OOB samples, by class. */
            zeroInt(nright, nclass);
            for (n = 0; n < nsample; ++n) {
                /* out-of-bag and predicted correctly: */
                if (jin[n] == 0 && jtr[n] == cl[n]) {
                    nright[cl[n] - 1]++;
                    nrightall++;
                }
            }
            for (m = 0; m < mdim; ++m) {
                if (varUsed[m]) {
                    nrightimpall = 0;
                    zeroInt(nrightimp, nclass);
                    for (n = 0; n < nsample; ++n) tx[n] = x[m + n*mdim];
                    /* Permute the m-th variable. */
                    permuteOOB(m, x, jin, nsample, mdim);
                    /* Predict the modified data using the current tree. */
                    predictClassTree(x, nsample, mdim, treemap + 2*idxByNnode,
                            nodestatus + idxByNnode,
                            xbestsplit + idxByNnode,
                            bestvar + idxByNnode,
                            nodeclass + idxByNnode, ndbigtree[jb],
                            cat, nclass, jvr, nodex, *maxcat);
                    /* Count how often correct predictions are made with
                     * the modified data. */
                    for (n = 0; n < nsample; n++) {
                        if (jin[n] == 0) {
                            if (jvr[n] == cl[n]) {
                                nrightimp[cl[n] - 1]++;
                                nrightimpall++;
                            }
                            if (localImp && jvr[n] != jtr[n]) {
                                if (cl[n] == jvr[n]) {
                                    impmat[m + n*mdim] -= 1.0;
                                } else {
                                    impmat[m + n*mdim] += 1.0;
                                }
                            }
                        }
                        /* Restore the original data for that variable. */
                        x[m + n*mdim] = tx[n];
                    }
                    /* Accumulate decrease in proportions of correct
                     * predictions. */
                    for (n = 0; n < nclass; ++n) {
                        if (nout[n] > 0) {
                            imprt[m + n*mdim] +=
                                    ((double) (nright[n] - nrightimp[n])) /
                                    nout[n];
                            impsd[m + n*mdim] +=
                                    ((double) (nright[n] - nrightimp[n]) *
                                    (nright[n] - nrightimp[n])) / nout[n];
                        }
                    }
                    if (noutall > 0) {
                        imprt[m + nclass*mdim] +=
                                ((double)(nrightall - nrightimpall)) / noutall;
                        impsd[m + nclass*mdim] +=
                                ((double) (nrightall - nrightimpall) *
                                (nrightall - nrightimpall)) / noutall;
                    }
                }
            }
        }
        
        /*  DO PROXIMITIES */
        if (iprox) {
            computeProximity(prox, oobprox, nodex, jin, oobpair, near);
            /* proximity for test data */
            if (*testdat) {
                computeProximity(proxts, 0, nodexts, jin, oobpair, ntest);
                /* Compute proximity between testset and training set. */
                for (n = 0; n < ntest; ++n) {
                    for (k = 0; k < near; ++k) {
                        if (nodexts[n] == nodex[k])
                            proxts[n + ntest * (k+ntest)] += 1.0;
                    }
                }
            }
        }
        
        if (keepf) idxByNnode += *nrnodes;
        if (keepInbag) idxByNsample += nsample0;
    }
    PutRNGstate();
   
    
    /*  Final processing of variable importance. */
    for (m = 0; m < mdim; m++) tgini[m] /= Ntree;
      
    if (imp) {
        for (m = 0; m < mdim; ++m) {
            if (localImp) { /* casewise measures */
                for (n = 0; n < nsample; ++n) impmat[m + n*mdim] /= out[n];
            }
            /* class-specific measures */
            for (k = 0; k < nclass; ++k) {
                av = imprt[m + k*mdim] / Ntree;
                impsd[m + k*mdim] =
                        sqrt(((impsd[m + k*mdim] / Ntree) - av*av) / Ntree);
                imprt[m + k*mdim] = av;
                /* imprt[m + k*mdim] = (se <= 0.0) ? -1000.0 - av : av / se; */
            }
            /* overall measures */
            av = imprt[m + nclass*mdim] / Ntree;
            impsd[m + nclass*mdim] =
                    sqrt(((impsd[m + nclass*mdim] / Ntree) - av*av) / Ntree);
            imprt[m + nclass*mdim] = av;
            imprt[m + (nclass+1)*mdim] = tgini[m];
        }
    } else {
        for (m = 0; m < mdim; ++m) imprt[m] = tgini[m];
    }
   
    /*  PROXIMITY DATA ++++++++++++++++++++++++++++++++*/
    if (iprox) {
        for (n = 0; n < near; ++n) {
            for (k = n + 1; k < near; ++k) {
                prox[near*k + n] /= oobprox ?
                    (oobpair[near*k + n] > 0 ? oobpair[near*k + n] : 1) :
                        Ntree;
                        prox[near*n + k] = prox[near*k + n];
            }
            prox[near*n + n] = 1.0;
        }
        if (*testdat) {
            for (n = 0; n < ntest; ++n)
                for (k = 0; k < ntest + nsample; ++k)
                    proxts[ntest*k + n] /= Ntree;
        }
    }
    if (trace <= Ntree){
        printf("\nmdim %d, nclass %d, nrnodes %d, nsample %d, ntest %d\n", mdim, nclass, *nrnodes, nsample, ntest);
        printf("\noobprox %d, mdim %d, nsample0 %d, Ntree %d, mtry %d, mimp %d", oobprox, mdim, nsample0, Ntree, mtry, mimp);
        printf("\nstratify %d, replace %d",stratify,replace);
        printf("\n");
    }
    
    //frees up the memory
    free(tgini);free(wl);free(wr);free(classpop);free(tclasscat);
    free(tclasspop);free(tx);free(win);free(tp);free(out);
    free(bestsplitnext);free(bestsplit);free(nodepop);free(nodestart);free(jin);
    free(nodex);free(nodexts);free(ta);free(ncase);free(jerr);
    free(varUsed);free(jtr);free(jvr);free(classFreq);free(jts);
    free(idmove);free(at);free(a);free(b);free(mind);
    free(nright);free(nrightimp);free(nout);
    
    if (oobprox) {
        free(oobpair);
    }
    
    if (stratify) {
        free(strata_size);
        for (n = 0; n < nstrata; ++n) {
            free(strata_idx[n]);
        }
        free(strata_idx);        
    } else {
        if (replace)
            free(nind);
    }
    //printf("labelts %d\n",labelts);fflush(stdout);
    
    if (labelts) {
        free(nclts);        
    }
    //printf("stratify %d",stratify);fflush(stdout);
}
Example #16
0
File: snorm.c Project: csilles/cxxr
/*
 *  REFERENCE
 *
 *    Ahrens, J.H. and Dieter, U.
 *    Extensions of Forsythe's method for random sampling from
 *    the normal distribution.
 *    Math. Comput. 27, 927-937.
 *
 *    The definitions of the constants a[k], d[k], t[k] and
 *    h[k] are according to the abovementioned article
 */
double norm_rand(void)
{

    const static double a[32] =
    {
	0.0000000, 0.03917609, 0.07841241, 0.1177699,
	0.1573107, 0.19709910, 0.23720210, 0.2776904,
	0.3186394, 0.36012990, 0.40225010, 0.4450965,
	0.4887764, 0.53340970, 0.57913220, 0.6260990,
	0.6744898, 0.72451440, 0.77642180, 0.8305109,
	0.8871466, 0.94678180, 1.00999000, 1.0775160,
	1.1503490, 1.22985900, 1.31801100, 1.4177970,
	1.5341210, 1.67594000, 1.86273200, 2.1538750
    };

    const static double d[31] =
    {
	0.0000000, 0.0000000, 0.0000000, 0.0000000,
	0.0000000, 0.2636843, 0.2425085, 0.2255674,
	0.2116342, 0.1999243, 0.1899108, 0.1812252,
	0.1736014, 0.1668419, 0.1607967, 0.1553497,
	0.1504094, 0.1459026, 0.1417700, 0.1379632,
	0.1344418, 0.1311722, 0.1281260, 0.1252791,
	0.1226109, 0.1201036, 0.1177417, 0.1155119,
	0.1134023, 0.1114027, 0.1095039
    };

    const static double t[31] =
    {
	7.673828e-4, 0.002306870, 0.003860618, 0.005438454,
	0.007050699, 0.008708396, 0.010423570, 0.012209530,
	0.014081250, 0.016055790, 0.018152900, 0.020395730,
	0.022811770, 0.025434070, 0.028302960, 0.031468220,
	0.034992330, 0.038954830, 0.043458780, 0.048640350,
	0.054683340, 0.061842220, 0.070479830, 0.081131950,
	0.094624440, 0.112300100, 0.136498000, 0.171688600,
	0.227624100, 0.330498000, 0.584703100
    };

    const static double h[31] =
    {
	0.03920617, 0.03932705, 0.03950999, 0.03975703,
	0.04007093, 0.04045533, 0.04091481, 0.04145507,
	0.04208311, 0.04280748, 0.04363863, 0.04458932,
	0.04567523, 0.04691571, 0.04833487, 0.04996298,
	0.05183859, 0.05401138, 0.05654656, 0.05953130,
	0.06308489, 0.06737503, 0.07264544, 0.07926471,
	0.08781922, 0.09930398, 0.11555990, 0.14043440,
	0.18361420, 0.27900160, 0.70104740
    };

    /*----------- Constants and definitions for  Kinderman - Ramage --- */
    /*
     *  REFERENCE
     *
     *    Kinderman A. J. and Ramage J. G. (1976).
     *    Computer generation of normal random variables.
     *    JASA 71, 893-896.
     */

#define C1		0.398942280401433
#define C2		0.180025191068563
#define g(x)		(C1*exp(-x*x/2.0)-C2*(A-x))

    const static double A =  2.216035867166471;

    double s, u1, w, y, u2, u3, aa, tt, theta, R;
    int i;

    switch(N01_kind) {

    case  AHRENS_DIETER: /* see Reference above */

	u1 = unif_rand();
	s = 0.0;
	if (u1 > 0.5)
	    s = 1.0;
	u1 = u1 + u1 - s;
	u1 *= 32.0;
	i = (int) u1;
	if (i == 32)
	    i = 31;
	if (i != 0) {
	    u2 = u1 - i;
	    aa = a[i - 1];
	    while (u2 <= t[i - 1]) {
		u1 = unif_rand();
		w = u1 * (a[i] - aa);
		tt = (w * 0.5 + aa) * w;
		repeat {
		    if (u2 > tt)
			goto deliver;
		    u1 = unif_rand();
		    if (u2 < u1)
			break;
		    tt = u1;
		    u2 = unif_rand();
		}
		u2 = unif_rand();
	    }
	    w = (u2 - t[i - 1]) * h[i - 1];
	}
	else {
Example #17
0
/**********************************************************************
 * 
 * random_int
 *   
 * Generates a random int integer between "low" and "high", inclusive.
 *
 *  Input:
 * 
 *    low
 *
 *    high
 *
 **********************************************************************/
int random_int(int low, int high)
{
  return((int)(unif_rand()*(double)(high - low + 1)) + low);
}
Example #18
0
//     rhyper(NR, NB, n) -- NR 'red', NB 'blue', n drawn, how many are 'red'
double rhyper(double nn1in, double nn2in, double kkin)
{
    /* extern double afc(int); */

    int nn1, nn2, kk;
    int ix; // return value (coerced to double at the very end)
    Rboolean setup1, setup2;

    /* These should become 'thread_local globals' : */
    static int ks = -1, n1s = -1, n2s = -1;
    static int m, minjx, maxjx;
    static int k, n1, n2; // <- not allowing larger integer par
    static double tn;

    // II :
    static double w;
    // III:
    static double a, d, s, xl, xr, kl, kr, lamdl, lamdr, p1, p2, p3;

    /* check parameter validity */

    if(!R_FINITE(nn1in) || !R_FINITE(nn2in) || !R_FINITE(kkin))
	ML_ERR_return_NAN;

    nn1in = R_forceint(nn1in);
    nn2in = R_forceint(nn2in);
    kkin  = R_forceint(kkin);

    if (nn1in < 0 || nn2in < 0 || kkin < 0 || kkin > nn1in + nn2in)
	ML_ERR_return_NAN;
    if (nn1in >= INT_MAX || nn2in >= INT_MAX || kkin >= INT_MAX) {
	/* large n -- evade integer overflow (and inappropriate algorithms)
	   -------- */
        // FIXME: Much faster to give rbinom() approx when appropriate; -> see Kuensch(1989)
	// Johnson, Kotz,.. p.258 (top) mention the *four* different binomial approximations
	if(kkin == 1.) { // Bernoulli
	    return rbinom(kkin, nn1in / (nn1in + nn2in));
	}
	// Slow, but safe: return  F^{-1}(U)  where F(.) = phyper(.) and  U ~ U[0,1]
	return qhyper(unif_rand(), nn1in, nn2in, kkin, FALSE, FALSE);
    }
    nn1 = (int)nn1in;
    nn2 = (int)nn2in;
    kk  = (int)kkin;

    /* if new parameter values, initialize */
    if (nn1 != n1s || nn2 != n2s) {
	setup1 = TRUE;	setup2 = TRUE;
    } else if (kk != ks) {
	setup1 = FALSE;	setup2 = TRUE;
    } else {
	setup1 = FALSE;	setup2 = FALSE;
    }
    if (setup1) {
	n1s = nn1;
	n2s = nn2;
	tn = nn1 + nn2;
	if (nn1 <= nn2) {
	    n1 = nn1;
	    n2 = nn2;
	} else {
	    n1 = nn2;
	    n2 = nn1;
	}
    }
    if (setup2) {
	ks = kk;
	if (kk + kk >= tn) {
	    k = (int)(tn - kk);
	} else {
	    k = kk;
	}
    }
    if (setup1 || setup2) {
	m = (int) ((k + 1.) * (n1 + 1.) / (tn + 2.));
	minjx = imax2(0, k - n2);
	maxjx = imin2(n1, k);
#ifdef DEBUG_rhyper
	REprintf("rhyper(nn1=%d, nn2=%d, kk=%d), setup: floor(mean)= m=%d, jx in (%d..%d)\n",
		 nn1, nn2, kk, m, minjx, maxjx);
#endif
    }
    /* generate random variate --- Three basic cases */

    if (minjx == maxjx) { /* I: degenerate distribution ---------------- */
#ifdef DEBUG_rhyper
	REprintf("rhyper(), branch I (degenerate)\n");
#endif
	ix = maxjx;
	goto L_finis; // return appropriate variate

    } else if (m - minjx < 10) { // II: (Scaled) algorithm HIN (inverse transformation) ----
	const static double scale = 1e25; // scaling factor against (early) underflow
	const static double con = 57.5646273248511421;
					  // 25*log(10) = log(scale) { <==> exp(con) == scale }
	if (setup1 || setup2) {
	    double lw; // log(w);  w = exp(lw) * scale = exp(lw + log(scale)) = exp(lw + con)
	    if (k < n2) {
		lw = afc(n2) + afc(n1 + n2 - k) - afc(n2 - k) - afc(n1 + n2);
	    } else {
		lw = afc(n1) + afc(     k     ) - afc(k - n2) - afc(n1 + n2);
	    }
	    w = exp(lw + con);
	}
	double p, u;
#ifdef DEBUG_rhyper
	REprintf("rhyper(), branch II; w = %g > 0\n", w);
#endif
      L10:
	p = w;
	ix = minjx;
	u = unif_rand() * scale;
#ifdef DEBUG_rhyper
	REprintf("  _new_ u = %g\n", u);
#endif
	while (u > p) {
	    u -= p;
	    p *= ((double) n1 - ix) * (k - ix);
	    ix++;
	    p = p / ix / (n2 - k + ix);
#ifdef DEBUG_rhyper
	    REprintf("       ix=%3d, u=%11g, p=%20.14g (u-p=%g)\n", ix, u, p, u-p);
#endif
	    if (ix > maxjx)
		goto L10;
	    // FIXME  if(p == 0.)  we also "have lost"  => goto L10
	}
    } else { /* III : H2PE Algorithm --------------------------------------- */

	double u,v;

	if (setup1 || setup2) {
	    s = sqrt((tn - k) * k * n1 * n2 / (tn - 1) / tn / tn);

	    /* remark: d is defined in reference without int. */
	    /* the truncation centers the cell boundaries at 0.5 */

	    d = (int) (1.5 * s) + .5;
	    xl = m - d + .5;
	    xr = m + d + .5;
	    a = afc(m) + afc(n1 - m) + afc(k - m) + afc(n2 - k + m);
	    kl = exp(a - afc((int) (xl)) - afc((int) (n1 - xl))
		     - afc((int) (k - xl))
		     - afc((int) (n2 - k + xl)));
	    kr = exp(a - afc((int) (xr - 1))
		     - afc((int) (n1 - xr + 1))
		     - afc((int) (k - xr + 1))
		     - afc((int) (n2 - k + xr - 1)));
	    lamdl = -log(xl * (n2 - k + xl) / (n1 - xl + 1) / (k - xl + 1));
	    lamdr = -log((n1 - xr + 1) * (k - xr + 1) / xr / (n2 - k + xr));
	    p1 = d + d;
	    p2 = p1 + kl / lamdl;
	    p3 = p2 + kr / lamdr;
	}
#ifdef DEBUG_rhyper
	REprintf("rhyper(), branch III {accept/reject}: (xl,xr)= (%g,%g); (lamdl,lamdr)= (%g,%g)\n",
		 xl, xr, lamdl,lamdr);
	REprintf("-------- p123= c(%g,%g,%g)\n", p1,p2, p3);
#endif
	int n_uv = 0;
      L30:
	u = unif_rand() * p3;
	v = unif_rand();
	n_uv++;
	if(n_uv >= 10000) {
	    REprintf("rhyper() branch III: giving up after %d rejections", n_uv);
	    ML_ERR_return_NAN;
        }
#ifdef DEBUG_rhyper
	REprintf(" ... L30: new (u=%g, v ~ U[0,1])[%d]\n", u, n_uv);
#endif

	if (u < p1) {		/* rectangular region */
	    ix = (int) (xl + u);
	} else if (u <= p2) {	/* left tail */
	    ix = (int) (xl + log(v) / lamdl);
	    if (ix < minjx)
		goto L30;
	    v = v * (u - p1) * lamdl;
	} else {		/* right tail */
	    ix = (int) (xr - log(v) / lamdr);
	    if (ix > maxjx)
		goto L30;
	    v = v * (u - p2) * lamdr;
	}

	/* acceptance/rejection test */
	Rboolean reject = TRUE;

	if (m < 100 || ix <= 50) {
	    /* explicit evaluation */
	    /* The original algorithm (and TOMS 668) have
		   f = f * i * (n2 - k + i) / (n1 - i) / (k - i);
	       in the (m > ix) case, but the definition of the
	       recurrence relation on p134 shows that the +1 is
	       needed. */
	    int i;
	    double f = 1.0;
	    if (m < ix) {
		for (i = m + 1; i <= ix; i++)
		    f = f * (n1 - i + 1) * (k - i + 1) / (n2 - k + i) / i;
	    } else if (m > ix) {
		for (i = ix + 1; i <= m; i++)
		    f = f * i * (n2 - k + i) / (n1 - i + 1) / (k - i + 1);
	    }
	    if (v <= f) {
		reject = FALSE;
	    }
	} else {

	    const static double deltal = 0.0078;
	    const static double deltau = 0.0034;

	    double e, g, r, t, y;
	    double de, dg, dr, ds, dt, gl, gu, nk, nm, ub;
	    double xk, xm, xn, y1, ym, yn, yk, alv;

#ifdef DEBUG_rhyper
	    REprintf(" ... accept/reject 'large' case v=%g\n", v);
#endif
	    /* squeeze using upper and lower bounds */
	    y = ix;
	    y1 = y + 1.0;
	    ym = y - m;
	    yn = n1 - y + 1.0;
	    yk = k - y + 1.0;
	    nk = n2 - k + y1;
	    r = -ym / y1;
	    s = ym / yn;
	    t = ym / yk;
	    e = -ym / nk;
	    g = yn * yk / (y1 * nk) - 1.0;
	    dg = 1.0;
	    if (g < 0.0)
		dg = 1.0 + g;
	    gu = g * (1.0 + g * (-0.5 + g / 3.0));
	    gl = gu - .25 * (g * g * g * g) / dg;
	    xm = m + 0.5;
	    xn = n1 - m + 0.5;
	    xk = k - m + 0.5;
	    nm = n2 - k + xm;
	    ub = y * gu - m * gl + deltau
		+ xm * r * (1. + r * (-0.5 + r / 3.0))
		+ xn * s * (1. + s * (-0.5 + s / 3.0))
		+ xk * t * (1. + t * (-0.5 + t / 3.0))
		+ nm * e * (1. + e * (-0.5 + e / 3.0));
	    /* test against upper bound */
	    alv = log(v);
	    if (alv > ub) {
		reject = TRUE;
	    } else {
				/* test against lower bound */
		dr = xm * (r * r * r * r);
		if (r < 0.0)
		    dr /= (1.0 + r);
		ds = xn * (s * s * s * s);
		if (s < 0.0)
		    ds /= (1.0 + s);
		dt = xk * (t * t * t * t);
		if (t < 0.0)
		    dt /= (1.0 + t);
		de = nm * (e * e * e * e);
		if (e < 0.0)
		    de /= (1.0 + e);
		if (alv < ub - 0.25 * (dr + ds + dt + de)
		    + (y + m) * (gl - gu) - deltal) {
		    reject = FALSE;
		}
		else {
		    /* * Stirling's formula to machine accuracy
		     */
		    if (alv <= (a - afc(ix) - afc(n1 - ix)
				- afc(k - ix) - afc(n2 - k + ix))) {
			reject = FALSE;
		    } else {
			reject = TRUE;
		    }
		}
	    }
	} // else
	if (reject)
	    goto L30;
    }


L_finis:
    /* return appropriate variate */

    if (kk + kk >= tn) {
	if (nn1 > nn2) {
	    ix = kk - nn2 + ix;
	} else {
	    ix = nn1 - ix;
	}
    } else {
	if (nn1 > nn2)
	    ix = kk - ix;
    }
    return ix;
}
Example #19
0
File: MCMC.c Project: Zsedo/ergm
/*********************
 void MetropolisHastings

 In this function, theta is a m->n_stats-vector just as in MCMCSample,
 but now networkstatistics is merely another m->n_stats-vector because
 this function merely iterates nsteps times through the Markov
 chain, keeping track of the cumulative change statistics along
 the way, then returns, leaving the updated change statistics in
 the networkstatistics vector.  In other words, this function 
 essentially generates a sample of size one
*********************/
MCMCStatus MetropolisHastings(MHproposal *MHp,
			      double *theta, double *networkstatistics,
			      int nsteps, int *staken,
			      int fVerbose,
			      Network *nwp,
			      Model *m) {

  unsigned int taken=0, unsuccessful=0;
/*  if (fVerbose)
    Rprintf("Now proposing %d MH steps... ", nsteps); */
  for(unsigned int step=0; step < nsteps; step++) {
    MHp->logratio = 0;
    (*(MHp->func))(MHp, nwp); /* Call MH function to propose toggles */

    if(MHp->toggletail[0]==MH_FAILED){
      if(MHp->togglehead[0]==MH_UNRECOVERABLE)
	error("Something very bad happened during proposal. Memory has not been deallocated, so restart R soon.");
      if(MHp->togglehead[0]==MH_IMPOSSIBLE){
	Rprintf("MH Proposal function encountered a configuration from which no toggle(s) can be proposed.\n");
	return MCMC_MH_FAILED;
      }
      if(MHp->togglehead[0]==MH_UNSUCCESSFUL){
	warning("MH Proposal function failed to find a valid proposal.");
	unsuccessful++;
	if(unsuccessful>taken*MH_QUIT_UNSUCCESSFUL){
	  Rprintf("Too many MH Proposal function failures.\n");
	  return MCMC_MH_FAILED;
	}       

	continue;
      }
    }
    
    if(fVerbose>=5){
      Rprintf("Proposal: ");
      for(unsigned int i=0; i<MHp->ntoggles; i++)
	Rprintf(" (%d, %d)", MHp->toggletail[i], MHp->togglehead[i]);
      Rprintf("\n");
    }

    /* Calculate change statistics,
       remembering that tail -> head */
    ChangeStats(MHp->ntoggles, MHp->toggletail, MHp->togglehead, nwp, m);

    if(fVerbose>=5){
      Rprintf("Changes: (");
      for(unsigned int i=0; i<m->n_stats; i++)
	Rprintf(" %f ", m->workspace[i]);
      Rprintf(")\n");
    }
    
    /* Calculate inner product */
    double ip=0;
    for (unsigned int i=0; i<m->n_stats; i++){
      ip += theta[i] * m->workspace[i];
    }
    /* The logic is to set cutoff = ip+logratio ,
       then let the MH probability equal min{exp(cutoff), 1.0}.
       But we'll do it in log space instead.  */
    double cutoff = ip + MHp->logratio;

    if(fVerbose>=5){
      Rprintf("log acceptance probability: %f + %f = %f\n", ip, MHp->logratio, cutoff);
    }
    
    /* if we accept the proposed network */
    if (cutoff >= 0.0 || log(unif_rand()) < cutoff) { 
      if(fVerbose>=5){
	Rprintf("Accepted.\n");
      }

      /* Make proposed toggles (updating timestamps--i.e., for real this time) */
      for(unsigned int i=0; i < MHp->ntoggles; i++){
	ToggleEdge(MHp->toggletail[i], MHp->togglehead[i], nwp);
	
	if(MHp->discord)
	  for(Network **nwd=MHp->discord; *nwd!=NULL; nwd++){
	    ToggleEdge(MHp->toggletail[i],  MHp->togglehead[i], *nwd);
	  }
      }
      /* record network statistics for posterity */
      for (unsigned int i = 0; i < m->n_stats; i++){
	networkstatistics[i] += m->workspace[i];
      }
      taken++;
    }else{
      if(fVerbose>=5){
	Rprintf("Rejected.\n");
      }
    }
  }
  
  *staken = taken;
  return MCMC_OK;
}
Example #20
0
SEXP glm_mcmc(SEXP Y, SEXP X, SEXP Roffset, SEXP Rweights, 
			  SEXP Rprobinit, SEXP Rmodeldim, 
			  SEXP modelprior, SEXP Rbestmodel,  SEXP plocal, 
			  SEXP BURNIN_Iterations,
			  SEXP Ra, SEXP Rb, SEXP Rs,
	      SEXP family, SEXP Rcontrol, SEXP Rlaplace
			  )
{
	int nProtected = 0;
	int nModels=LENGTH(Rmodeldim);
	SEXP ANS = PROTECT(allocVector(VECSXP, 17)); ++nProtected;
	SEXP ANS_names = PROTECT(allocVector(STRSXP, 17)); ++nProtected;
	SEXP Rprobs = PROTECT(duplicate(Rprobinit)); ++nProtected;
	SEXP MCMCprobs= PROTECT(duplicate(Rprobinit)); ++nProtected;
	SEXP R2 = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
	SEXP shrinkage = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
	SEXP modelspace = PROTECT(allocVector(VECSXP, nModels)); ++nProtected;
	SEXP modeldim =  PROTECT(duplicate(Rmodeldim)); ++nProtected;
	SEXP counts =  PROTECT(duplicate(Rmodeldim)); ++nProtected;
	SEXP beta = PROTECT(allocVector(VECSXP, nModels)); ++nProtected;
	SEXP se = PROTECT(allocVector(VECSXP, nModels)); ++nProtected;
	SEXP deviance = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
	SEXP modelprobs = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
	SEXP priorprobs = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
	SEXP logmarg = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
	SEXP sampleprobs = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
	SEXP Q = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
	SEXP Rintercept = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
	
	SEXP NumUnique = PROTECT(allocVector(INTSXP, 1)); ++nProtected;
	
	double *probs, MH=0.0, prior_m=1.0, shrinkage_m, logmargy, postold, postnew;
	int i, m, n, pmodel_old, *bestmodel;
	int mcurrent, n_sure;
	glmstptr *glmfamily;

	glmfamily = make_glmfamily_structure(family);

	//get dimsensions of all variables 
	int p = INTEGER(getAttrib(X,R_DimSymbol))[1];
	int k = LENGTH(modelprobs);
	
	struct Var *vars = (struct Var *) R_alloc(p, sizeof(struct Var)); // Info about the model variables. 
	probs =  REAL(Rprobs);
	n = sortvars(vars, probs, p); 
	for (i =n; i <p; i++) REAL(MCMCprobs)[vars[i].index] = probs[vars[i].index];
	for (i =0; i <n; i++) REAL(MCMCprobs)[vars[i].index] = 0.0;
	
	// fill in the sure things 
	int *model = ivecalloc(p);
	for (i = n, n_sure = 0; i < p; i++)  {
		model[vars[i].index] = (int) vars[i].prob;
		if (model[vars[i].index] == 1) ++n_sure;
	}
	
	GetRNGstate();

	NODEPTR tree, branch;
	tree = make_node(-1.0);
	//  Rprintf("For m=0, Initialize Tree with initial Model\n");  

	m = 0;
	bestmodel = INTEGER(Rbestmodel);
	INTEGER(modeldim)[m] = n_sure;

	// Rprintf("Create Tree\n"); 
	branch = tree;
	CreateTree(branch, vars, bestmodel, model, n, m, modeldim);
	int pmodel = INTEGER(modeldim)[m];
	SEXP Rmodel_m =	PROTECT(allocVector(INTSXP,pmodel));
	GetModel_m(Rmodel_m, model, p);
	//evaluate logmargy and shrinkage
	SEXP glm_fit = PROTECT(glm_FitModel(X, Y, Rmodel_m, Roffset, Rweights,
					    glmfamily, Rcontrol, Ra, Rb, Rs, Rlaplace));	
	prior_m  = compute_prior_probs(model,pmodel,p, modelprior);
	
	logmargy = REAL(getListElement(getListElement(glm_fit, "lpy"),"lpY"))[0];
	shrinkage_m = REAL(getListElement(getListElement(glm_fit, "lpy"),
					"shrinkage"))[0];
	SetModel2(logmargy, shrinkage_m, prior_m, sampleprobs, logmarg, shrinkage, priorprobs, m);
	SetModel1(glm_fit, Rmodel_m, beta, se, modelspace, deviance, R2, Q,Rintercept, m);
	UNPROTECT(2);

	int nUnique=0, newmodel=0;
	double *real_model = vecalloc(n);
	int *modelold = ivecalloc(p);
	int old_loc = 0;
	int new_loc;
	pmodel_old = pmodel;
	nUnique=1;
	INTEGER(counts)[0] = 0;
	postold =  REAL(logmarg)[m] + log(REAL(priorprobs)[m]);
	memcpy(modelold, model, sizeof(int)*p);
	m = 0;
	int *varin= ivecalloc(p);
	int *varout= ivecalloc(p);
	double problocal = REAL(plocal)[0];
	while (nUnique < k && m < INTEGER(BURNIN_Iterations)[0]) {
		memcpy(model, modelold, sizeof(int)*p);
		pmodel =  n_sure;

		MH = GetNextModelCandidate(pmodel_old, n, n_sure, model, vars, problocal, varin, varout);
		
		branch = tree;
		newmodel= 0;
		for (i = 0; i< n; i++) {
			int bit =  model[vars[i].index];
			if (bit == 1) {
				if (branch->one != NULL) branch = branch->one;
				else newmodel = 1;
			} else {
				if (branch->zero != NULL)  branch = branch->zero;
				else newmodel = 1;
			} 
			pmodel  += bit;
		}

		if (pmodel  == n_sure || pmodel == n + n_sure) {
			MH = 1.0/(1.0 - problocal);
		}
		if (newmodel == 1) {
		  new_loc = nUnique;
		  PROTECT(Rmodel_m = allocVector(INTSXP,pmodel));
		  GetModel_m(Rmodel_m, model, p);
		  
		  glm_fit = PROTECT(glm_FitModel(X, Y, Rmodel_m, Roffset, Rweights,
						 glmfamily, Rcontrol, Ra, Rb, Rs, Rlaplace));	
		  prior_m = compute_prior_probs(model,pmodel,p, modelprior);
			
		  logmargy = REAL(getListElement(getListElement(glm_fit, "lpy"),"lpY"))[0];
		  shrinkage_m = REAL(getListElement(getListElement(glm_fit, "lpy"),	
						  "shrinkage"))[0];

		  postnew = logmargy + log(prior_m);
		} else {
		  new_loc = branch->where;
		  postnew =  REAL(logmarg)[new_loc] + log(REAL(priorprobs)[new_loc]);      
		} 

		MH *= exp(postnew - postold);
		//    Rprintf("MH new %lf old %lf\n", postnew, postold);
		if (unif_rand() < MH) {
			if (newmodel == 1)  {
			  new_loc = nUnique;
			  insert_model_tree(tree, vars, n, model, nUnique);
			  INTEGER(modeldim)[nUnique] = pmodel;
				//Rprintf("model %d: %d variables\n", m, pmodel);	
			  SetModel2(logmargy, shrinkage_m, prior_m, sampleprobs, logmarg, shrinkage, priorprobs, nUnique);
			  SetModel1(glm_fit, Rmodel_m, beta, se, modelspace, deviance, R2, Q, Rintercept, nUnique);
			  
			  UNPROTECT(2);	
			  ++nUnique; 
			}
			old_loc = new_loc;
			postold = postnew;
			pmodel_old = pmodel;
			memcpy(modelold, model, sizeof(int)*p);
		} else  {
			if (newmodel == 1) UNPROTECT(2);
		}
		INTEGER(counts)[old_loc] += 1;
		for (i = 0; i < n; i++) {
			// store in opposite order so nth variable is first 
			real_model[n-1-i] = (double) modelold[vars[i].index];
			REAL(MCMCprobs)[vars[i].index] += (double) modelold[vars[i].index];
		}
		m++;
	}

	for (i = 0; i < n; i++) {
		REAL(MCMCprobs)[vars[i].index] /= (double) m;
	}
	
	// Compute marginal probabilities  
	mcurrent = nUnique;
	//	Rprintf("NumUnique Models Accepted %d \n", nUnique);
	compute_modelprobs(modelprobs, logmarg, priorprobs,mcurrent);
	compute_margprobs(modelspace, modeldim, modelprobs, probs, mcurrent, p);        

	INTEGER(NumUnique)[0] = nUnique;
	SET_VECTOR_ELT(ANS, 0, Rprobs);
	SET_STRING_ELT(ANS_names, 0, mkChar("probne0"));

	if (nUnique < nModels) {
		SEXP modelspaceP = PROTECT(allocVector(VECSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			SEXP model_temp = PROTECT(VECTOR_ELT(modelspace, i));
			SET_ELEMENT(modelspaceP, i, model_temp);
			UNPROTECT(1);
		}
		SET_VECTOR_ELT(ANS, 1, modelspaceP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 1, modelspace);
	}
	SET_STRING_ELT(ANS_names, 1, mkChar("which"));

	if (nUnique < nModels) {
		SEXP logmargP = PROTECT(allocVector(REALSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			REAL(logmargP)[i] = REAL(logmarg)[i];
		}
		SET_VECTOR_ELT(ANS, 2, logmargP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 2, logmarg);
	}
	SET_STRING_ELT(ANS_names, 2, mkChar("logmarg"));

	if (nUnique < nModels) {
		SEXP modelprobsP = PROTECT(allocVector(REALSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			REAL(modelprobsP)[i] = REAL(modelprobs)[i];
		}
		SET_VECTOR_ELT(ANS, 3, modelprobsP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 3, modelprobs);
	}
	SET_STRING_ELT(ANS_names, 3, mkChar("postprobs"));

	if (nUnique < nModels) {
		SEXP priorprobsP = PROTECT(allocVector(REALSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			REAL(priorprobsP)[i] = REAL(priorprobs)[i];
		}
		SET_VECTOR_ELT(ANS, 4, priorprobsP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 4, priorprobs);
	}
	SET_STRING_ELT(ANS_names, 4, mkChar("priorprobs"));

	if (nUnique < nModels) {
		SEXP sampleprobsP = PROTECT(allocVector(REALSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			REAL(sampleprobsP)[i] = REAL(sampleprobs)[i];
		}
		SET_VECTOR_ELT(ANS, 5, sampleprobsP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 5, sampleprobs);
	}
	SET_STRING_ELT(ANS_names, 5, mkChar("sampleprobs"));

	if (nUnique < nModels) {
		SEXP devianceP = PROTECT(allocVector(REALSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			REAL(devianceP)[i] = REAL(deviance)[i];
		}
		SET_VECTOR_ELT(ANS, 6, devianceP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 6, deviance);
	}
	SET_STRING_ELT(ANS_names, 6, mkChar("deviance"));

	if (nUnique < nModels) {
		SEXP betaP = PROTECT(allocVector(VECSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			SEXP beta_temp = PROTECT(VECTOR_ELT(beta, i));
			SET_ELEMENT(betaP, i, beta_temp);
			UNPROTECT(1);
		}
		SET_VECTOR_ELT(ANS, 7, betaP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 7, beta);
	}
	SET_STRING_ELT(ANS_names, 7, mkChar("coefficients"));

	if (nUnique < nModels) {
		SEXP seP = PROTECT(allocVector(VECSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			SEXP se_temp = PROTECT(VECTOR_ELT(se, i));
			SET_ELEMENT(seP, i, se_temp);
			UNPROTECT(1);
		}
		SET_VECTOR_ELT(ANS, 8, seP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 8, se);
	}
	SET_STRING_ELT(ANS_names, 8, mkChar("se"));

	if (nUnique < nModels) {
		SEXP shrinkageP = PROTECT(allocVector(REALSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			REAL(shrinkageP)[i] = REAL(shrinkage)[i];
		}
		SET_VECTOR_ELT(ANS, 9, shrinkageP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 9, shrinkage);
	}
	SET_STRING_ELT(ANS_names, 9, mkChar("shrinkage"));

	if (nUnique < nModels) {
		SEXP modeldimP = PROTECT(allocVector(INTSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			INTEGER(modeldimP)[i] = INTEGER(modeldim)[i];
		}
		SET_VECTOR_ELT(ANS, 10, modeldimP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 10, modeldim);
	}
	SET_STRING_ELT(ANS_names, 10, mkChar("size"));

	if (nUnique < nModels) {
		SEXP R2P = PROTECT(allocVector(REALSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			REAL(R2P)[i] = REAL(R2)[i];
		}
		SET_VECTOR_ELT(ANS, 11, R2P);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 11, R2);
	}
	SET_STRING_ELT(ANS_names, 11, mkChar("R2"));

	if (nUnique < nModels) {
		SEXP countsP = PROTECT(allocVector(INTSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			INTEGER(countsP)[i] = INTEGER(counts)[i];
		}
		SET_VECTOR_ELT(ANS, 12, countsP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 12, counts);
	}
	SET_STRING_ELT(ANS_names, 12, mkChar("freq"));

	SET_VECTOR_ELT(ANS, 13, MCMCprobs);
	SET_STRING_ELT(ANS_names, 13, mkChar("probs.MCMC"));

	SET_VECTOR_ELT(ANS, 14, NumUnique);
	SET_STRING_ELT(ANS_names, 14, mkChar("n.Unique"));

	if (nUnique < nModels) {
		SEXP QP = PROTECT(allocVector(REALSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			REAL(QP)[i] = REAL(Q)[i];
		}
		SET_VECTOR_ELT(ANS, 15, QP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 15, Q);
	}
	SET_STRING_ELT(ANS_names, 15, mkChar("Q"));

	if (nUnique < nModels) {
		SEXP RinterceptP = PROTECT(allocVector(REALSXP, nUnique));
		for (i =0; i < nUnique; i++) { 
			REAL(RinterceptP)[i] = REAL(Rintercept)[i];
		}
		SET_VECTOR_ELT(ANS, 16, RinterceptP);
		UNPROTECT(1);
	} else {
		SET_VECTOR_ELT(ANS, 16, Rintercept);
	}
	SET_STRING_ELT(ANS_names, 16, mkChar("intercept"));

	setAttrib(ANS, R_NamesSymbol, ANS_names);
	
	PutRNGstate();
    UNPROTECT(nProtected);
	//Rprintf("Return\n");
	return(ANS);  
}
Example #21
0
SEXP GillespieDirectCR(SEXP pre, SEXP post, SEXP h, SEXP M, SEXP T, SEXP delta,
		       SEXP runs, SEXP place, SEXP transition, SEXP rho)
{
  int k;

#ifdef RB_TIME
  clock_t c0, c1;
  c0 = clock();
#endif

  // Get dimensions of pre
  int *piTmp = INTEGER(getAttrib(pre, R_DimSymbol));
  int iTransitions = piTmp[0], iPlaces = piTmp[1];

  int *piPre = INTEGER(pre), *piPost = INTEGER(post);

  SEXP sexpTmp;

  int iTransition, iPlace, iTransitionPtr, iPlacePtr,
    iTransition2, iTransitionPtr2;

  // Find out which elements of h are doubles and which functions
  SEXP sexpFunction;
  PROTECT(sexpFunction = allocVector(VECSXP, iTransitions));
  double *pdH = (double *) R_alloc(iTransitions, sizeof(double));
  DL_FUNC *pCFunction = (DL_FUNC *) R_alloc(iTransitions, sizeof(DL_FUNC *));
  int *piHzType = (int *) R_alloc(iTransitions, sizeof(int));
  for (iTransition = 0; iTransition < iTransitions; iTransition++) {
    if (inherits(sexpTmp = VECTOR_ELT(h, iTransition), "NativeSymbol")) {
      pCFunction[iTransition] = (void *) R_ExternalPtrAddr(sexpTmp);
      piHzType[iTransition] = HZ_CFUNCTION;    
    } else if (isNumeric(sexpTmp)){
      pdH[iTransition] = REAL(sexpTmp)[0];
      piHzType[iTransition] = HZ_DOUBLE;
    } else  if (isFunction(sexpTmp)) {
      SET_VECTOR_ELT(sexpFunction, iTransition, lang1(sexpTmp));
      piHzType[iTransition] = HZ_RFUNCTION;
    } else {
      error("Unrecongnized transition function type\n");
    }
  }

  // Setup Matrix S
  int *piS = (int *) R_alloc(iTransitions * iPlaces, sizeof(int));

  // Position of non zero cells in pre per transition
  int *piPreNZxRow = (int *) R_alloc(iTransitions * iPlaces, sizeof(int));

  // Totals of non zero cells in pre per transition
  int *piPreNZxRowTot = (int *) R_alloc(iTransitions, sizeof(int));

  // Position of non zero cells in S per transition
  int *piSNZxRow = (int *) R_alloc(iTransitions * iPlaces, sizeof(int));

  // Totals of non zero cells in S per transition
  int *piSNZxRowTot = (int *) R_alloc(iTransitions, sizeof(int));

  for (iTransition = 0; iTransition < iTransitions; iTransition++) {
    int iPreNZxRow_col = 0;
    int iSNZxRow_col = 0;
    for (iPlace = 0; iPlace < iPlaces; iPlace++) {
      if (piPre[iTransition + iTransitions * iPlace]) {
	piPreNZxRow[iTransition + iTransitions * iPreNZxRow_col++] = iPlace;
      }
      if ((piS[iTransition + iTransitions * iPlace] = 
	   piPost[iTransition + iTransitions * iPlace] - piPre[iTransition + iTransitions * iPlace])) {
	piSNZxRow[iTransition + iTransitions * iSNZxRow_col++] = iPlace;
      }
    }
    piPreNZxRowTot[iTransition] = iPreNZxRow_col;
    piSNZxRowTot[iTransition] = iSNZxRow_col;
  }

  // Position of non zero cells in pre per place
  int *piPreNZxCol = (int *) R_alloc(iTransitions * iPlaces, sizeof(int));

  // Totals of non zero cells in pre per place
  int *piPreNZxColTot = (int *) R_alloc(iPlaces, sizeof(int));

  for (iPlace = 0; iPlace < iPlaces; iPlace++) {
    int iPreNZxCol_row = 0;
    for (iTransition = 0; iTransition < iTransitions; iTransition++) {
      if (piPre[iTransition + iTransitions * iPlace]) {
	piPreNZxCol[iPreNZxCol_row++ + iTransitions * iPlace] = iTransition;
      }
    }
    piPreNZxColTot[iPlace] = iPreNZxCol_row;
  }

  // Hazards that need to be recalculated if a given transition has happened
  int *piHazardsToModxRow = (int *) R_alloc((iTransitions + 1) * iTransitions, sizeof(int));

  // Totals of hazards to recalculate for each transition that has happened
  int *piHazardsToModxRowTot = (int *) R_alloc(iTransitions + 1, sizeof(int));
  
  for(iTransition = 0; iTransition < iTransitions; iTransition++) {
    int iHazardToCompTot = 0;
    for(iPlace = 0; iPlace < iPlaces; iPlace++) {
      if (piS[iTransition + iTransitions * iPlace]) {
	// Identify the transitions that need the hazards recalculated
	for(iTransitionPtr2 = 0; iTransitionPtr2 < piPreNZxColTot[iPlace]; iTransitionPtr2++) {
	  iTransition2 = piPreNZxCol[iTransitionPtr2 + iTransitions * iPlace];
	  int iAddThis = TRUE;
	  for (k = 0; k < iHazardToCompTot; k++) {
	    if(piHazardsToModxRow[iTransition + (iTransitions + 1) * k] == iTransition2) {
	      iAddThis = FALSE;
	      break;
	    }
	  }	    
	  if (iAddThis)
	    piHazardsToModxRow[iTransition + (iTransitions + 1) * iHazardToCompTot++] = iTransition2;
	}
      }
    }
    piHazardsToModxRowTot[iTransition] = iHazardToCompTot;
  }
  // For the initial calculation of all hazards...
  for(iTransition = 0; iTransition < iTransitions; iTransition++) {
    piHazardsToModxRow[iTransitions + (iTransitions + 1) * iTransition] = iTransition;
  }
  piHazardsToModxRowTot[iTransitions] = iTransitions;

  SEXP sexpCrntMarking;
  PROTECT(sexpCrntMarking = allocVector(REALSXP, iPlaces));
  double *pdCrntMarking = REAL(sexpCrntMarking);

  double dDelta = *REAL(delta);
  int iTotalSteps, iSectionSteps;
  double dT = 0;
  void *pCManage_time = 0;
  SEXP sexpRManage_time = 0;
  if (inherits(T, "NativeSymbol")) {
    pCManage_time = (void *) R_ExternalPtrAddr(T);
    dT = ((double(*)(double, double *)) pCManage_time)(-1, pdCrntMarking);
  } else if (isNumeric(T)){
    dT = *REAL(T);
  } else  if (isFunction(T)) {
    PROTECT(sexpRManage_time = lang1(T));

    defineVar(install("y"), sexpCrntMarking, rho);
    PROTECT(sexpTmp = allocVector(REALSXP, 1));
    *REAL(sexpTmp) = -1;
    defineVar(install("StartTime"), sexpTmp, rho);
    UNPROTECT_PTR(sexpTmp);
    dT = *REAL(VECTOR_ELT(eval(sexpRManage_time, rho),0));
  } else {
    error("Unrecognized time function type\n");
  }
  
  iTotalSteps = iSectionSteps = (int)(dT / dDelta) + 1;

  int iRun, iRuns = *INTEGER(runs);

  // Hazard vector
  double *pdTransitionHazard = (double *) R_alloc(iTransitions, sizeof(double));

  SEXP sexpRun;
  PROTECT(sexpRun = allocVector(VECSXP, iRuns));

  int iTotalUsedRandomNumbers = 0;

  // DiscTime Vector
  SEXP sexpD_time;
  PROTECT(sexpD_time = allocVector(REALSXP, iTotalSteps));
  double *pdDiscTime = REAL(sexpD_time);
  double dTmp = 0;
  for (k = 0; k < iTotalSteps; k++) {
    pdDiscTime[k] = dTmp;
    dTmp += dDelta;
  }

  SEXP sexpMarkingRowNames;
  PROTECT(sexpMarkingRowNames = allocVector(INTSXP, iTotalSteps));
  piTmp = INTEGER(sexpMarkingRowNames);
  for (k = 0; k < iTotalSteps; k++)
    piTmp[k] = k+1;

  double **ppdMarking = (double **) R_alloc(iPlaces, sizeof(double *));

  int iLevels = 7;
  int iGroups = pow(2, iLevels - 1);
  // Group holding the transitions that lie between boundaries
  int **ppiGroup = (int **) R_alloc(iGroups, sizeof(int *));
  // Number of transition each group has
  int *piGroupElm = (int *) R_alloc(iGroups, sizeof(int));
  // Total propensity hazard for each group
  int *piTotGroupTransitions = (int *) R_alloc(iGroups, sizeof(int));

  int *piTransitionInGroup = (int *) R_alloc(iTransitions, sizeof(int));
  int *piTransitionPositionInGroup = (int *) R_alloc(iTransitions, sizeof(int));

  int iGroup;
  for (iGroup = 0; iGroup < iGroups; iGroup++) {
    ppiGroup[iGroup] = (int *) R_alloc(iTransitions, sizeof(int));
  }

  node **ppnodeLevel = (node **) R_alloc(iLevels, sizeof(node *));
  int iLevel, iNode;
  int iNodesPerLevel = 1;
  for (iLevel = 0; iLevel < iLevels; iLevel++) {
    ppnodeLevel[iLevel] = (node *) R_alloc(iNodesPerLevel, sizeof(node));
    iNodesPerLevel *= 2;
  }
  node *pnodeRoot = &ppnodeLevel[0][0];
  pnodeRoot->parent = 0;
  node *pnodeGroup = ppnodeLevel[iLevels-1];

  iNodesPerLevel = 1;
  for (iLevel = 0; iLevel < iLevels; iLevel++) {
    for (iNode = 0; iNode < iNodesPerLevel; iNode++) {
      if (iLevel < iLevels-1) {
	ppnodeLevel[iLevel][iNode].iGroup = -1;
	ppnodeLevel[iLevel][iNode].left = &ppnodeLevel[iLevel+1][iNode*2];
	ppnodeLevel[iLevel][iNode].right = &ppnodeLevel[iLevel+1][iNode*2+1];
	ppnodeLevel[iLevel+1][iNode*2].parent = ppnodeLevel[iLevel+1][iNode*2+1].parent =
	  &ppnodeLevel[iLevel][iNode];
      } else {
	ppnodeLevel[iLevel][iNode].iGroup = iNode;
	ppnodeLevel[iLevel][iNode].left = ppnodeLevel[iLevel][iNode].right = 0;
      }
    }
    iNodesPerLevel *= 2;
  }

  double dNewHazard = 0;
  // Find minimum propensity
  double dMinHazard = DBL_MAX;
  for(iTransition = 0; iTransition < iTransitions; iTransition++) {
    switch(piHzType[iTransition]) {
    case HZ_DOUBLE:
      dNewHazard = pdH[iTransition];
      for(iPlacePtr = 0; iPlacePtr < piPreNZxRowTot[iTransition]; iPlacePtr++) {
	iPlace = piPreNZxRow[iTransition + iTransitions * iPlacePtr];
	for (k = 0; k < piPre[iTransition + iTransitions * iPlace]; k++)
	  dNewHazard *= (piPre[iTransition + iTransitions * iPlace] - k) / (double)(k+1);
      }
      if (dNewHazard > 0 && dNewHazard < dMinHazard)
	dMinHazard = dNewHazard;
      break;
    case HZ_CFUNCTION:	
      break;
    case HZ_RFUNCTION:
      break;
    }
  }

  GetRNGstate();
  for (iRun = 0; iRun < iRuns; iRun++) {

    int iUsedRandomNumbers = 0;
    Rprintf("%d ", iRun+1);

    // Totals for kind of transition vector
    SEXP sexpTotXTransition;
    PROTECT(sexpTotXTransition = allocVector(INTSXP, iTransitions));
    int *piTotTransitions = INTEGER(sexpTotXTransition);
  
    for(iTransition = 0; iTransition < iTransitions; iTransition++) {
      piTotTransitions[iTransition] = 0;
    }
  
    SEXP sexpMarking;
    PROTECT(sexpMarking = allocVector(VECSXP, iPlaces));
    //setAttrib(sexpMarking, R_NamesSymbol, place);
    //setAttrib(sexpMarking, R_RowNamesSymbol, sexpMarkingRowNames);
    //setAttrib(sexpMarking, R_ClassSymbol, ScalarString(mkChar("data.frame")));

    // Setup initial state
    double *pdTmp = REAL(M);
    for (iPlace = 0; iPlace < iPlaces; iPlace++) {
      SET_VECTOR_ELT(sexpMarking, iPlace, sexpTmp = allocVector(REALSXP, iTotalSteps));
      ppdMarking[iPlace] = REAL(sexpTmp);

      pdCrntMarking[iPlace] = pdTmp[iPlace];
    }
    
    for(iTransition = 0; iTransition < iTransitions; iTransition++) {
      pdTransitionHazard[iTransition] = 0;
      
      piTransitionInGroup[iTransition] = -1;
    }
    for (iGroup = 0; iGroup < iGroups; iGroup++) {
      piGroupElm[iGroup] = 0;
      piTotGroupTransitions[iGroup] = 0;
    }
    
    iNodesPerLevel = 1;
    for (iLevel = 0; iLevel < iLevels; iLevel++) {
      for (iNode = 0; iNode < iNodesPerLevel; iNode++) {
	ppnodeLevel[iLevel][iNode].dPartialAcumHazard = 0;
      }
      iNodesPerLevel *= 2;
    }
    node *pnode;
    
    double dTime = 0, dTarget = 0;
    int iTotTransitions = 0;

    int iStep = 0;
    int iInterruptCnt = 10000000;
    do {
      if (pCManage_time || sexpRManage_time) {
	double dEnd = 0;
	if (pCManage_time) {
	  dEnd = ((double(*)(double, double *)) pCManage_time)(dTarget, pdCrntMarking);
	} else {
	  defineVar(install("y"), sexpCrntMarking, rho);
	  PROTECT(sexpTmp = allocVector(REALSXP, 1));
	  *REAL(sexpTmp) = dTarget;
	  defineVar(install("StartTime"), sexpTmp, rho);
	  UNPROTECT_PTR(sexpTmp);

	  sexpTmp = eval(sexpRManage_time, rho);
	  dEnd = *REAL(VECTOR_ELT(sexpTmp,0));
	  for(iPlace = 0; iPlace < iPlaces; iPlace++) {
	    pdCrntMarking[iPlace] = REAL(VECTOR_ELT(sexpTmp,1))[iPlace];
	  }
	}
	iSectionSteps = (int)(dEnd / dDelta) + 1;
      }

      for(iPlace = 0; iPlace < iPlaces; iPlace++) {
	ppdMarking[iPlace][iStep] = pdCrntMarking[iPlace];
      }

      dTime = dTarget;
      dTarget += dDelta;
      
      // For the calculation of all hazards...
      int iLastTransition = iTransitions;
      
      do {
	// Get hazards only for the transitions associated with
	// places whose quantities changed in the last step.
	for(iTransitionPtr = 0; iTransitionPtr < piHazardsToModxRowTot[iLastTransition]; iTransitionPtr++) {
	  iTransition = piHazardsToModxRow[iLastTransition + (iTransitions + 1) * iTransitionPtr];
	  switch(piHzType[iTransition]) {
	  case HZ_DOUBLE:
	    dNewHazard = pdH[iTransition];
	    for(iPlacePtr = 0; iPlacePtr < piPreNZxRowTot[iTransition]; iPlacePtr++) {
	      iPlace = piPreNZxRow[iTransition + iTransitions * iPlacePtr];
	      for (k = 0; k < piPre[iTransition + iTransitions * iPlace]; k++)
		dNewHazard *= (pdCrntMarking[iPlace] - k) / (double)(k+1);
	    }
	    break;
	  case HZ_CFUNCTION:
	    dNewHazard = ((double(*)(double, double *)) pCFunction[iTransition])(dTime, pdCrntMarking);
	    break;
	  case HZ_RFUNCTION:
	    defineVar(install("y"), sexpCrntMarking, rho);
	    dNewHazard = REAL(eval(VECTOR_ELT(sexpFunction, iTransition), rho))[0];
	    break;
	  }

	  double dDeltaHazard;
	  frexp(dNewHazard/dMinHazard, &iGroup);
	  if (iGroup-- > 0) {
	    // Transition belongs to a group
	    if (iGroup == piTransitionInGroup[iTransition]) {
	      // Transitions will stay in same group as it was
	      dDeltaHazard = dNewHazard - pdTransitionHazard[iTransition];
	      pnode = &pnodeGroup[iGroup];
	      do {
		pnode->dPartialAcumHazard += dDeltaHazard;
	      } while ((pnode = pnode->parent));
	    } else if (piTransitionInGroup[iTransition] != -1) {
	      // Transition was in another group and needs to be moved to the new one
	      int iOldGroup = piTransitionInGroup[iTransition];
	      int iOldPositionInGroup = piTransitionPositionInGroup[iTransition];
	      dDeltaHazard = -pdTransitionHazard[iTransition];
	      pnode = &pnodeGroup[iOldGroup];
	      do {
		pnode->dPartialAcumHazard += dDeltaHazard;
	      } while ((pnode = pnode->parent));
	      piGroupElm[iOldGroup]--; // Old group will have one less element
	      // Now, piGroupElm[iOldGroup] is the index to last transition in group
	      if (iOldPositionInGroup != piGroupElm[iOldGroup]) {
		// Transition is not the last in group,
		// put the last transition in place of the one to be removed
		ppiGroup[iOldGroup][iOldPositionInGroup] = 
		  ppiGroup[iOldGroup][piGroupElm[iOldGroup]];
		// Update position of previous last transition in group
		piTransitionPositionInGroup[ppiGroup[iOldGroup][iOldPositionInGroup]] = 
		  iOldPositionInGroup;
	      }
	      dDeltaHazard = dNewHazard;
	      pnode = &pnodeGroup[iGroup];
	      do {
		pnode->dPartialAcumHazard += dDeltaHazard;
	      } while ((pnode = pnode->parent));
	      piTransitionInGroup[iTransition] = iGroup;
	      piTransitionPositionInGroup[iTransition] = piGroupElm[iGroup];
	      ppiGroup[iGroup][piGroupElm[iGroup]++] = iTransition;
	    } else if (piTransitionInGroup[iTransition] == -1) { // Transition was in no group
	      dDeltaHazard = dNewHazard;
	      pnode = &pnodeGroup[iGroup];
	      do {
		pnode->dPartialAcumHazard += dDeltaHazard;
	      } while ((pnode = pnode->parent));
	      piTransitionInGroup[iTransition] = iGroup;
	      piTransitionPositionInGroup[iTransition] = piGroupElm[iGroup];
	      ppiGroup[iGroup][piGroupElm[iGroup]++] = iTransition;
	    } else {
	    error("ERROR: Option not considered 1\n");
	    }
	  } else if (piTransitionInGroup[iTransition] != -1) {
	    // Transition will not belong to any group and needs to be removed from old
	    int iOldGroup = piTransitionInGroup[iTransition];
	    int iOldPositionInGroup = piTransitionPositionInGroup[iTransition];
	    dDeltaHazard = -pdTransitionHazard[iTransition];
	    pnode = &pnodeGroup[iOldGroup];
	    do {
	      pnode->dPartialAcumHazard += dDeltaHazard;
	    } while ((pnode = pnode->parent));
	    piGroupElm[iOldGroup]--; // Old group will have one less element
	    // Now, piGroupElm[iOldGroup] is the index to last transition in group
	    if (iOldPositionInGroup != piGroupElm[iOldGroup]) {
	      // Transition is not the last in group,
	      // put the last transition in place of the one to be removed
	      ppiGroup[iOldGroup][iOldPositionInGroup] = 
		ppiGroup[iOldGroup][piGroupElm[iOldGroup]];
	      // Update position of previous last transition in group
	      piTransitionPositionInGroup[ppiGroup[iOldGroup][iOldPositionInGroup]] = 
		iOldPositionInGroup;
	    }
	    piTransitionInGroup[iTransition] = -1;
	  }
	  pdTransitionHazard[iTransition] = dNewHazard;
	}
	
	// Get Time to transition
	dTime += exp_rand() / pnodeRoot->dPartialAcumHazard;
	iUsedRandomNumbers++;
	
	while (dTime >= dTarget) {
	  ++iStep;
	  // Update the state for the fixed incremented time.
	  for(iPlace = 0; iPlace < iPlaces; iPlace++)
	    ppdMarking[iPlace][iStep] = pdCrntMarking[iPlace];
	  if (iStep == iSectionSteps - 1)
	    goto EXIT_LOOP;

	  dTarget += dDelta;

	  // Force check if user interrupted
	  iInterruptCnt = 1;
	}
	if (! --iInterruptCnt) {
	  // Allow user interruption
	  R_CheckUserInterrupt();
	  iInterruptCnt = 10000000;
	}
	do {
	  // Find group containing firing transition
	  double dRnd = unif_rand() * pnodeRoot->dPartialAcumHazard;
	  iUsedRandomNumbers++;
	  pnode = pnodeRoot;
	  do {
	    if (dRnd < pnode->left->dPartialAcumHazard) {
	      pnode = pnode->left;
	    } else {
	      dRnd -= pnode->left->dPartialAcumHazard;
	      pnode = pnode->right;
	    }	      
	  } while (pnode->left);
	  // Next check is because
	  // once in a while it is generated a number that goes past
	  // the last group or selects a group with zero elements
	  // due to accumulated truncation errors.
	  // Discard this random number and try again.
	} while (piGroupElm[iGroup = pnode->iGroup] == 0);

	double dMaxInGroup = dMinHazard * pow(2, iGroup + 1);
	// Find transition in group
	while (1) {
	  if (! --iInterruptCnt) {
	    // Allow user interruption
	    R_CheckUserInterrupt();
	    iInterruptCnt = 10000000;
	  }
	  iTransitionPtr = (int) (unif_rand() * piGroupElm[iGroup]);
	  iUsedRandomNumbers++;
	  iTransition = ppiGroup[iGroup][iTransitionPtr];
	  iUsedRandomNumbers++;
	  if (pdTransitionHazard[iTransition] > unif_rand() * dMaxInGroup) {
	    piTotTransitions[iLastTransition = iTransition]++;
	    for(iPlacePtr = 0; iPlacePtr < piSNZxRowTot[iTransition]; iPlacePtr++) {
	      iPlace = piSNZxRow[iTransition + iTransitions * iPlacePtr];
	      
	      // Update the state
	      pdCrntMarking[iPlace] += piS[iTransition + iTransitions * iPlace];
	    }
	    break;
	  }
	}
	++iTotTransitions;
      } while (TRUE);
    EXIT_LOOP:;
      Rprintf(".");
    } while (iSectionSteps < iTotalSteps);
    iTotalUsedRandomNumbers += iUsedRandomNumbers;
    Rprintf("\t%d\t%d\t%d", iTotTransitions, iUsedRandomNumbers, iTotalUsedRandomNumbers);
#ifdef RB_SUBTIME
    c1 = clock();
    Rprintf ("\t To go: ");
    PrintfTime((double) (c1 - c0)/CLOCKS_PER_SEC/(iRun+1)*(iRuns-iRun-1));
#endif
    Rprintf ("\n");
    
    SEXP sexpTotTransitions;
    PROTECT(sexpTotTransitions = allocVector(INTSXP, 1));
    INTEGER(sexpTotTransitions)[0] = iTotTransitions;

    SEXP sexpThisRun;
    PROTECT(sexpThisRun = allocVector(VECSXP, 3));

    SET_VECTOR_ELT(sexpThisRun, 0, sexpMarking);
    UNPROTECT_PTR(sexpMarking);
    SET_VECTOR_ELT(sexpThisRun, 1, sexpTotXTransition);
    UNPROTECT_PTR(sexpTotXTransition);
    SET_VECTOR_ELT(sexpThisRun, 2, sexpTotTransitions);
    UNPROTECT_PTR(sexpTotTransitions);

    SEXP sexpNames;
    PROTECT(sexpNames = allocVector(VECSXP, 3));
    SET_VECTOR_ELT(sexpNames, 0, mkChar("M"));
    SET_VECTOR_ELT(sexpNames, 1, mkChar("transitions"));
    SET_VECTOR_ELT(sexpNames, 2, mkChar("tot.transitions"));
    setAttrib(sexpThisRun, R_NamesSymbol, sexpNames);
    UNPROTECT_PTR(sexpNames);

    SET_VECTOR_ELT(sexpRun, iRun, sexpThisRun);
    UNPROTECT_PTR(sexpThisRun);
  }
  PutRNGstate();

  SEXP sexpAns;
  PROTECT(sexpAns = allocVector(VECSXP, 4));
  SET_VECTOR_ELT(sexpAns, 0, place);
  SET_VECTOR_ELT(sexpAns, 1, transition);
  SET_VECTOR_ELT(sexpAns, 2, sexpD_time);
  UNPROTECT_PTR(sexpD_time);
  SET_VECTOR_ELT(sexpAns, 3, sexpRun);
  UNPROTECT_PTR(sexpRun);

  SEXP sexpNames;
  PROTECT(sexpNames = allocVector(VECSXP, 4));
  SET_VECTOR_ELT(sexpNames, 0, mkChar("place"));
  SET_VECTOR_ELT(sexpNames, 1, mkChar("transition"));
  SET_VECTOR_ELT(sexpNames, 2, mkChar("dt"));
  SET_VECTOR_ELT(sexpNames, 3, mkChar("run"));
  setAttrib(sexpAns, R_NamesSymbol, sexpNames);
  UNPROTECT_PTR(sexpNames);

#ifdef RB_TIME
  c1 = clock();
  double dCpuTime = (double) (c1 - c0)/CLOCKS_PER_SEC;
  Rprintf ("Elapsed CPU time: ");
  PrintfTime(dCpuTime);
  Rprintf ("\t(%fs)\n", dCpuTime);
#endif

  if (sexpRManage_time)
    UNPROTECT_PTR(sexpRManage_time);
  UNPROTECT_PTR(sexpFunction);
  UNPROTECT_PTR(sexpMarkingRowNames);
  UNPROTECT_PTR(sexpCrntMarking);
  UNPROTECT_PTR(sexpAns);
  return(sexpAns);
}
Example #22
0
GBMRESULT CGBM::iterate
(
    double *adF,
    double &dTrainError,
    double &dValidError,
    double &dOOBagImprove,
    int &cNodes,
    int cNumClasses,
    int cClassIdx
)
{
    GBMRESULT hr = GBM_OK;
    unsigned long i = 0;
    unsigned long cBagged = 0;
    int cIdxOff = cClassIdx * (cTrain + cValid);

 //   for(i=0; i < cTrain + cIdxOff; i++){ adF[i] = 0;}
    if(!fInitialized)
    {
        hr = GBM_FAIL;
        goto Error;
    }

    dTrainError = 0.0;
    dValidError = 0.0;
    dOOBagImprove = 0.0;

    vecpTermNodes.assign(2*cDepth+1,NULL);

    // randomly assign observations to the Bag

    if (cClassIdx == 0)
    {
        if (!IsPairwise())
        {
            // regular instance based training
            for(i=0; i<cTrain; i++) /* && (cBagged < cTotalInBag); i++) */
            {
                if(unif_rand()*(cTrain-i) < cTotalInBag-cBagged)
                {
                    afInBag[i] = true;
                    cBagged++;
                }
                else
                {
                    afInBag[i] = false;
                }
/*                if (cBagged >= cTotalInBag){
                    break; 		
                } */
            }
	    std::fill(afInBag + i, afInBag + cTrain, false);
        }
        else
        {
            // for pairwise training, sampling is per group
            // therefore, we will not have exactly cTotalInBag instances

            double dLastGroup = -1;
            bool chosen = false;
            unsigned int cBaggedGroups = 0;
            unsigned int cSeenGroups   = 0;
            unsigned int cTotalGroupsInBag = (unsigned long)(dBagFraction * cGroups);
            if (cTotalGroupsInBag <= 0)
            {
                cTotalGroupsInBag = 1;
            }
            for(i=0; i<cTrain; i++)
            {
                const double dGroup = pData->adMisc[i];
                if (dGroup != dLastGroup)
                {
                    if (cBaggedGroups >= cTotalGroupsInBag)
                    {
                        break;
                    }

                    // Group changed, make a new decision
                    chosen = (unif_rand()*(cGroups - cSeenGroups) < cTotalGroupsInBag - cBaggedGroups);
                    if (chosen)
                    {
                        cBaggedGroups++;
                    }
                    dLastGroup = dGroup;
                    cSeenGroups++;
                }
                if (chosen)
                {
                    afInBag[i] = true;
                    cBagged++;
                }
                else
                {
                    afInBag[i] = false;
                }
            }
            // the remainder is not in the bag
	    std::fill(afInBag + i, afInBag + cTrain, false);
        }
    }

#ifdef NOISY_DEBUG
    Rprintf("Compute working response\n");
#endif

    hr = pDist->ComputeWorkingResponse(pData->adY,
                                       pData->adMisc,
                                       pData->adOffset,
                                       adF,
                                       &adZ[0],
                                       pData->adWeight,
                                       afInBag,
                                       cTrain,
                                       cIdxOff);

    if(GBM_FAILED(hr))
    {
        goto Error;
    }

#ifdef NOISY_DEBUG
    Rprintf("Reset tree\n");
#endif
    hr = ptreeTemp->Reset();
#ifdef NOISY_DEBUG
    Rprintf("grow tree\n");
#endif

    hr = ptreeTemp->grow(&(adZ[cIdxOff]), pData, &(pData->adWeight[cIdxOff]),
                         &(adFadj[cIdxOff]), cTrain, cTotalInBag, dLambda, cDepth,
                         cMinObsInNode, afInBag, aiNodeAssign, aNodeSearch,
                         vecpTermNodes);

    if(GBM_FAILED(hr))
    {
        goto Error;
    }

#ifdef NOISY_DEBUG
    Rprintf("get node count\n");
#endif
    hr = ptreeTemp->GetNodeCount(cNodes);
    if(GBM_FAILED(hr))
    {
        goto Error;
    }

    // Now I have adF, adZ, and vecpTermNodes (new node assignments)
    // Fit the best constant within each terminal node
#ifdef NOISY_DEBUG
    Rprintf("fit best constant\n");
#endif

    hr = pDist->FitBestConstant(pData->adY,
                                pData->adMisc,
                                pData->adOffset,
                                pData->adWeight,
                                &adF[0],
                                &adZ[0],
                                aiNodeAssign,
                                cTrain,
                                vecpTermNodes,
                                (2*cNodes+1)/3, // number of terminal nodes
                                cMinObsInNode,
                                afInBag,
                                &adFadj[0],
                                cIdxOff);

    if(GBM_FAILED(hr))
    {
        goto Error;
    }

    // update training predictions
    // fill in missing nodes where N < cMinObsInNode
    hr = ptreeTemp->Adjust(aiNodeAssign,&(adFadj[cIdxOff]),cTrain,
                           vecpTermNodes,cMinObsInNode);
    if(GBM_FAILED(hr))
    {
        goto Error;
    }
    ptreeTemp->SetShrinkage(dLambda);

    if (cClassIdx == (cNumClasses - 1))
    {
        dOOBagImprove = pDist->BagImprovement(pData->adY,
                                              pData->adMisc,
                                              pData->adOffset,
                                              pData->adWeight,
                                              &adF[0],
                                              &adFadj[0],
                                              afInBag,
                                              dLambda,
                                              cTrain);
    }

    // update the training predictions
    for(i=0; i < cTrain; i++)
    {
        int iIdx = i + cIdxOff;
        adF[iIdx] += dLambda * adFadj[iIdx];
    }

    dTrainError = pDist->Deviance(pData->adY,
                                  pData->adMisc,
                                  pData->adOffset,
                                  pData->adWeight,
                                  adF,
                                  cTrain,
                                  cIdxOff);

    // update the validation predictions
    hr = ptreeTemp->PredictValid(pData,cValid,&(adFadj[cIdxOff]));

    for(i=cTrain; i < cTrain+cValid; i++)
    {
        adF[i + cIdxOff] += adFadj[i + cIdxOff];
    }

    if(pData->fHasOffset)
    {
        dValidError =
            pDist->Deviance(pData->adY,
                            pData->adMisc,
                            pData->adOffset,
                            pData->adWeight,
                            adF,
                            cValid,
                            cIdxOff + cTrain);
    }
    else
    {
        dValidError = pDist->Deviance(pData->adY,
                                      pData->adMisc,
                                      NULL,
                                      pData->adWeight,
                                      adF,
                                      cValid,
                                      cIdxOff + cTrain);
    }

Cleanup:
    return hr;
Error:
    goto Cleanup;
}
Example #23
0
/*  Define the R RNG for use from Fortran. */
void F77_SUB(rrand)(double *r) { *r = unif_rand(); }
Example #24
0
void
rcont2(int *nrow, int *ncol,
       /* vectors of row and column totals, and their sum ntotal: */
       int *nrowt, int *ncolt, int *ntotal,
       double *fact, int *jwork, int *matrix)
{
    int j, l, m, ia, ib, ic, jc, id, ie, ii, nll, nlm, nr_1, nc_1;
    double x, y, dummy, sumprb;
    Rboolean lsm, lsp;

    nr_1 = *nrow - 1;
    nc_1 = *ncol - 1;

    ib = 0; /* -Wall */

    /* Construct random matrix */
    for (j = 0; j < nc_1; ++j)
	jwork[j] = ncolt[j];

    jc = *ntotal;

    for (l = 0; l < nr_1; ++l) { /* -----  matrix[ l, * ] ----- */
	ia = nrowt[l];
	ic = jc;
	jc -= ia;/* = n_tot - sum(nr[0:l]) */

	for (m = 0; m < nc_1; ++m) {
	    id = jwork[m];
	    ie = ic;
	    ic -= id;
	    ib = ie - ia;
	    ii = ib - id;

	    if (ie == 0) { /* Row [l,] is full, fill rest with zero entries */
		for (j = m; j < nc_1; ++j)
		    matrix[l + j * *nrow] = 0;
		ia = 0;
		break;
	    }

	    /* Generate pseudo-random number */
	    dummy = unif_rand();

	    do {/* Outer Loop */

		/* Compute conditional expected value of MATRIX(L, M) */

		nlm = (int)(ia * (id / (double) ie) + 0.5);
		x = exp(fact[ia] + fact[ib] + fact[ic] + fact[id]
			- fact[ie] - fact[nlm]
			- fact[id - nlm] - fact[ia - nlm] - fact[ii + nlm]);
		if (x >= dummy)
		    break;
		if (x == 0.)/* MM: I haven't seen this anymore */
		    error(_("rcont2 [%d,%d]: exp underflow to 0; algorithm failure"), l, m);

		sumprb = x;
		y = x;
		nll = nlm;

		do {
		    /* Increment entry in row L, column M */
		    j = (int)((id - nlm) * (double)(ia - nlm));
		    lsp = (j == 0);
		    if (!lsp) {
			++nlm;
			x = x * j / ((double) nlm * (ii + nlm));
			sumprb += x;
			if (sumprb >= dummy)
			    goto L160;
		    }

		    do {
			R_CheckUserInterrupt();

			/* Decrement entry in row L, column M */
			j = (int)(nll * (double)(ii + nll));
			lsm = (j == 0);
			if (!lsm) {
			    --nll;
			    y = y * j / ((double) (id - nll) * (ia - nll));
			    sumprb += y;
			    if (sumprb >= dummy) {
				nlm = nll;
				goto L160;
			    }
			    /* else */
			    if (!lsp)
				break;/* to while (!lsp) */
			}
		    } while (!lsm);

		} while (!lsp);

		dummy = sumprb * unif_rand();

	    } while (1);

L160:
	    matrix[l + m * *nrow] = nlm;
	    ia -= nlm;
	    jwork[m] -= nlm;
	}
	matrix[l + nc_1 * *nrow] = ia;/* last column in row l */
    }

    /* Compute entries in last row of MATRIX */
    for (m = 0; m < nc_1; ++m)
	matrix[nr_1 + m * *nrow] = jwork[m];

    matrix[nr_1 + nc_1 * *nrow] = ib - matrix[nr_1 + (nc_1-1) * *nrow];

    return;
}
Example #25
0
/* Cross-Validation-routine from svm-train */
void do_cross_validation(struct svm_problem *prob,
			 struct svm_parameter *param,
			 int nr_fold,
			 double* cresults,
			 double* ctotal1,
			 double* ctotal2)
{
	int i;
	int total_correct = 0;
	double total_error = 0;
	double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;

	/* random shuffle */
	GetRNGstate();
	for(i=0; i<prob->l; i++)
	{
	        int j = i+((int) (unif_rand() * (prob->l-i)))%(prob->l-i);
		struct svm_node *tx;
		double ty;
			
		tx = prob->x[i];
		prob->x[i] = prob->x[j];
		prob->x[j] = tx;

		ty = prob->y[i];
		prob->y[i] = prob->y[j];
		prob->y[j] = ty;
	}
	PutRNGstate();

	for(i=0; i<nr_fold; i++)
	{
		int begin = i*prob->l/nr_fold;
		int end = (i+1)*prob->l/nr_fold;
		int j,k;
		struct svm_problem subprob;

		subprob.l = prob->l-(end-begin);
		subprob.x = Malloc(struct svm_node*,subprob.l);
		subprob.y = Malloc(double,subprob.l);
			
		k=0;
		for(j = 0; j < begin; j++)
		{
			subprob.x[k] = prob->x[j];
			subprob.y[k] = prob->y[j];
			++k;
		}
		for(j = end; j<prob->l; j++)
		{
			subprob.x[k] = prob->x[j];
			subprob.y[k] = prob->y[j];
			++k;
		}

		if(param->svm_type == EPSILON_SVR ||
		   param->svm_type == NU_SVR)
		{
			struct svm_model *submodel = svm_train(&subprob,param);
			double error = 0;
			for(j=begin;j<end;j++)
			{
				double v = svm_predict(submodel,prob->x[j]);
				double y = prob->y[j];
				error += (v-y)*(v-y);
				sumv += v;
				sumy += y;
				sumvv += v*v;
				sumyy += y*y;
				sumvy += v*y;
			}
			svm_free_and_destroy_model(&submodel);
			/* printf("Mean squared error = %g\n",
			   error/(end-begin)); */
			cresults[i] = error/(end-begin);
			total_error += error;			
		}
		else
		{
			struct svm_model *submodel = svm_train(&subprob,param);
			int correct = 0;
			for(j=begin;j<end;j++)
			{
				double v = svm_predict(submodel,prob->x[j]);
				if(v == prob->y[j])
					++correct;
			}
			svm_free_and_destroy_model(&submodel);
			/* printf("Accuracy = %g%% (%d/%d)\n", */
			/* 100.0*correct/(end-begin),correct,(end-begin)); */
			cresults[i] = 100.0*correct/(end-begin);
			total_correct += correct;
		}

		free(subprob.x);
		free(subprob.y);
	}
	
	if(param->svm_type == EPSILON_SVR || param->svm_type == NU_SVR)
	{
	    /* printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
	        printf("Cross Validation Squared correlation coefficient = %g\n",
	    	((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
	    	((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
	    	); */
	    *ctotal1 = total_error/prob->l;
	    *ctotal2 = ((prob->l * sumvy - sumv * sumy) *
			(prob->l * sumvy - sumv*sumy))  /
		       ((prob->l * sumvv - sumv * sumv) *
		        (prob->l * sumyy - sumy * sumy));
	}
	else
	    /* printf("Cross Validation Accuracy =
	       %g%%\n",100.0*total_correct/prob.l); */
	    *ctotal1 = 100.0 * total_correct / prob->l;
}
Example #26
0
size_t rewire_ex(unsigned short *incidence,size_t ncol, size_t nrow,size_t max_iter,size_t verbose,size_t MAXITER,unsigned int seed)
{


	set_rand(seed);

 	size_t i,j,kk,n,rand1,rand2,t=0;
	size_t e=0;
    //copy of the original incidence matrix
	size_t *from;
	size_t *to;
	size_t a,b,c,d;


	for(i=0;i<nrow;i++)
		for(j=0;j<ncol;j++)
			e+=incidence[i*ncol+j];


	e/=2;
	//initialization of score vector overwriting the original TO CHECK
	do from=(size_t*)calloc(e,sizeof(size_t)); while(from==NULL);
	do to=(size_t*)calloc(e,sizeof(size_t));   while(to==NULL);
	kk=0;
	for(i=0;i<nrow;++i)
		for(j=0;j<i;++j)
			if(incidence[i*ncol+j]==1)
				{
					from[kk]=i;
					to[kk]=j;
          kk++;
        }
	time_t  tin,tfin;
  tin = time (NULL);

 	//GetRNGstate();
	for(n=0;n<max_iter;t++)
		{
			if(verbose==1)
				loadBar( n,  max_iter, 100,  50);
		//random rewiring
		  rand1=(size_t) (unif_rand()*e);
      do rand2=(size_t) (unif_rand()*e); while (rand1==rand2);
      a=from[rand1];
      c=from[rand2];
      b=to[rand1];
      d=to[rand2];
	if(t>MAXITER)
			{
				tfin = time (NULL);
 				if(verbose==1)
    		printf("DONE in %d seconds \n",-(tin-tfin));
				//PutRNGstate();
				return (-1);

			}
		//  printf("%d %d %d %d %d %d %d %d %d %d\n ",rand1,rand2,a+1,b+1,c+1,d+1,incidence[a*ncol+d],incidence[c*ncol+b],incidence[a*ncol+c],incidence[d*ncol+b]);
     if(a!=c && b!=d&&  a!=d && c!=b &&
           (	(incidence[a*ncol+d]==0 && incidence[c*ncol+b]==0  ) ||
            (incidence[a*ncol+c]==0 && incidence[d*ncol+b]==0  ) ))
        {
            if(incidence[a*ncol+d]==0 && incidence[c*ncol+b]==0 && incidence[a*ncol+c]==0 && incidence[d*ncol+b]==0 )
            {
                if(unif_rand()>=0)
                {
                    incidence[a*ncol+d]=1;incidence[d*ncol+a]=1;
                    incidence[c*ncol+b]=1;incidence[b*ncol+c]=1;
                    incidence[a*ncol+b]=0;incidence[b*ncol+a]=0;
                    incidence[c*ncol+d]=0;incidence[d*ncol+c]=0;
                    to[rand1]=d;
                    to[rand2]=b;
                    n++;

                }
                else
                {
                    incidence[a*ncol+c]=1;incidence[c*ncol+a]=1;
                    incidence[d*ncol+b]=1;incidence[b*ncol+d]=1;
                    incidence[a*ncol+b]=0;incidence[b*ncol+a]=0;
                    incidence[c*ncol+d]=0;incidence[d*ncol+c]=0;
                    //	from[rand1]=d;
                    to[rand1]=c;
                    from[rand2]=b;
                    to[rand2]=d;

                    n++;

                }
            }
            else
                if(incidence[a*ncol+d]==0 && incidence[c*ncol+b]==0)
                {
                    incidence[a*ncol+d]=1;incidence[d*ncol+a]=1;
                    incidence[c*ncol+b]=1;incidence[b*ncol+c]=1;
                    incidence[a*ncol+b]=0;incidence[b*ncol+a]=0;
                    incidence[c*ncol+d]=0;incidence[d*ncol+c]=0;
                    to[rand1]=d;
                    to[rand2]=b;
                    n++;
                }
                else
                {
                    incidence[a*ncol+c]=1;incidence[c*ncol+a]=1;
                    incidence[d*ncol+b]=1;incidence[b*ncol+d]=1;
                    incidence[a*ncol+b]=0;incidence[b*ncol+a]=0;
                    incidence[c*ncol+d]=0;incidence[d*ncol+c]=0;
                    to[rand1]=c;
                    from[rand2]=b;
                    to[rand2]=d;
                    n++;
                }
        }
    }
	tfin = time (NULL);
 	if(verbose==1)
    printf("DONE in %d seconds \n",-(tin-tfin));
	//PutRNGstate();

	return 0;


}
Example #27
0
void classForest(int *mdim, int *ntest, int *nclass, int *maxcat,
        int *nrnodes, int *ntree, double *x, double *xbestsplit,
        double *pid, double *cutoff, double *countts, int *treemap,
        int *nodestatus, int *cat, int *nodeclass, int *jts,
        int *jet, int *bestvar, int *node, int *treeSize,
        int *keepPred, int *prox, double *proxMat, int *nodes) {
    int j, n, n1, n2, idxNodes, offset1, offset2, *junk, ntie;
    double crit, cmax;
    
    zeroDouble(countts, *nclass * *ntest);
    idxNodes = 0;
    offset1 = 0;
    offset2 = 0;
    junk = NULL;

    // Rprintf("nclass %d\n", *nclass);
    for (j = 0; j < *ntree; ++j) {
        // Rprintf("pCT nclass %d \n", *nclass);
        /* predict by the j-th tree */
		// Rprintf( "#ntree: %d, idxNodes: %d\n", j, idxNodes );
        predictClassTree(x, *ntest, *mdim, nrnodes, treemap + 2*idxNodes,
                nodestatus + idxNodes, xbestsplit + idxNodes,
                bestvar + idxNodes, nodeclass + idxNodes,
                treeSize[j], cat, *nclass,
                jts + offset1, node + offset2, *maxcat);

		//// original code
		//predictClassTree(x, *ntest, *mdim, treemap + 2*idxNodes,
		//	nodestatus + idxNodes, xbestsplit + idxNodes,
		//	bestvar + idxNodes, nodeclass + idxNodes,
		//	treeSize[j], cat, *nclass,
		//	jts + offset1, node + offset2, *maxcat);
        
        /* accumulate votes: */
        for (n = 0; n < *ntest; ++n) {
            countts[jts[n + offset1] - 1 + n * *nclass] += 1.0;
        }
        
        /* if desired, do proximities for this round */
        if (*prox) computeProximity(proxMat, 0, node + offset2, junk, junk,
                *ntest);
        idxNodes += *nrnodes;
        if (*keepPred) offset1 += *ntest;
        if (*nodes)    offset2 += *ntest;
    }
    
    //Rprintf("ntest %d\n", *ntest);
    /* Aggregated prediction is the class with the maximum votes/cutoff */
    for (n = 0; n < *ntest; ++n) {
        //Rprintf("Ap: ntest %d\n", *ntest);
        cmax = 0.0;
        ntie = 1;
        for (j = 0; j < *nclass; ++j) {
            crit = (countts[j + n * *nclass] / *ntree) / cutoff[j];
            if (crit > cmax) {
                jet[n] = j + 1;
                cmax = crit;
            }
            /* Break ties at random: */
            if (crit == cmax) {
                ntie++;
                if (unif_rand() > 1.0 / ntie) jet[n] = j + 1;
            }
        }
    }
    
    //Rprintf("ntest %d\n", *ntest);
    /* if proximities requested, do the final adjustment
     * (division by number of trees) */
    
    //Rprintf("prox %d",*prox);
    if (*prox) {
        //Rprintf("prox: ntest %d\n", *ntest);
        for (n1 = 0; n1 < *ntest; ++n1) {
            for (n2 = n1 + 1; n2 < *ntest; ++n2) {
                proxMat[n1 + n2 * *ntest] /= *ntree;
                proxMat[n2 + n1 * *ntest] = proxMat[n1 + n2 * *ntest];
            }
            proxMat[n1 + n1 * *ntest] = 1.0;
        }
    }
    //Rprintf("END ntest %d\n", *ntest);
    
}
Example #28
0
void regRF(double *x, double *y, int *xdim, int *sampsize,
	   int *nthsize, int *nrnodes, int *nTree, int *mtry, int *imp,
	   int *cat, int *maxcat, int *jprint, int *doProx, int *oobprox,
           int *biasCorr, double *yptr, double *errimp, double *impmat,
           double *impSD, double *prox, int *treeSize, int *nodestatus,
           int *lDaughter, int *rDaughter, double *avnode, int *mbest,
           double *upper, double *mse, int *keepf, int *replace,
           int *testdat, double *xts, int *nts, double *yts, int *labelts,
           double *yTestPred, double *proxts, double *msets, double *coef,
           int *nout, int *inbag) {
    /*************************************************************************
   Input:
   mdim=number of variables in data set
   nsample=number of cases

   nthsize=number of cases in a node below which the tree will not split,
   setting nthsize=5 generally gives good results.

   nTree=number of trees in run.  200-500 gives pretty good results

   mtry=number of variables to pick to split on at each node.  mdim/3
   seems to give genrally good performance, but it can be
   altered up or down

   imp=1 turns on variable importance.  This is computed for the
   mth variable as the percent rise in the test set mean sum-of-
   squared errors when the mth variable is randomly permuted.

  *************************************************************************/

    double errts = 0.0, averrb, meanY, meanYts, varY, varYts, r, xrand,
	errb = 0.0, resid=0.0, ooberr, ooberrperm, delta, *resOOB;

    double *yb, *xtmp, *xb, *ytr, *ytree, *tgini, *coeffs;

    int k, m, mr, n, nOOB, j, jout, idx, ntest, last, ktmp, nPerm,
        nsample, mdim, keepF, keepInbag;
    int *oobpair, varImp, localImp, *varUsed;

    int *in, *nind, *nodex, *nodexts, *probs;

    nsample = xdim[0];
    mdim = xdim[1];
    ntest = *nts;
    varImp = imp[0];
    localImp = imp[1];
    nPerm = imp[2];
    keepF = keepf[0];
    keepInbag = keepf[1];

    if (*jprint == 0) *jprint = *nTree + 1;

    yb         = (double *) S_alloc(*sampsize, sizeof(double));
    xb         = (double *) S_alloc(mdim * *sampsize, sizeof(double));
    ytr        = (double *) S_alloc(nsample, sizeof(double));
    xtmp       = (double *) S_alloc(nsample, sizeof(double));
    resOOB     = (double *) S_alloc(nsample, sizeof(double));
    coeffs     = (double *) S_alloc(*sampsize, sizeof(double));
  
    probs      = (int *) S_alloc(*sampsize, sizeof(int));
    in         = (int *) S_alloc(nsample, sizeof(int));
    nodex      = (int *) S_alloc(nsample, sizeof(int));
    varUsed    = (int *) S_alloc(mdim, sizeof(int));
    nind = *replace ? NULL : (int *) S_alloc(nsample, sizeof(int));

    if (*testdat) {
	ytree      = (double *) S_alloc(ntest, sizeof(double));
	nodexts    = (int *) S_alloc(ntest, sizeof(int));
    }
    oobpair = (*doProx && *oobprox) ?
	(int *) S_alloc(nsample * nsample, sizeof(int)) : NULL;

    /* If variable importance is requested, tgini points to the second
       "column" of errimp, otherwise it's just the same as errimp. */
    tgini = varImp ? errimp + mdim : errimp;

    averrb = 0.0;
    meanY = 0.0;
    varY = 0.0;

    zeroDouble(yptr, nsample);
    zeroInt(nout, nsample);
    for (n = 0; n < nsample; ++n) {
	varY += n * (y[n] - meanY)*(y[n] - meanY) / (n + 1);
	meanY = (n * meanY + y[n]) / (n + 1);
    }
    varY /= nsample;

    varYts = 0.0;
    meanYts = 0.0;
    if (*testdat) {
	for (n = 0; n < ntest; ++n) {
	    varYts += n * (yts[n] - meanYts)*(yts[n] - meanYts) / (n + 1);
	    meanYts = (n * meanYts + yts[n]) / (n + 1);
	}
	varYts /= ntest;
    }

    if (*doProx) {
        zeroDouble(prox, nsample * nsample);
	if (*testdat) zeroDouble(proxts, ntest * (nsample + ntest));
    }

    if (varImp) {
        zeroDouble(errimp, mdim * 2);
	if (localImp) zeroDouble(impmat, nsample * mdim);
    } else {
        zeroDouble(errimp, mdim);
    }
    if (*labelts) zeroDouble(yTestPred, ntest);

    /* print header for running output */
    if (*jprint <= *nTree) {
	Rprintf("     |      Out-of-bag   ");
	if (*testdat) Rprintf("|       Test set    ");
	Rprintf("|\n");
	Rprintf("Tree |      MSE  %%Var(y) ");
	if (*testdat) Rprintf("|      MSE  %%Var(y) ");
	Rprintf("|\n");
    }
    GetRNGstate();
    /*************************************
     * Start the loop over trees.
     *************************************/
    for (j = 0; j < *nTree; ++j) {

    /* multinomial */
    /*unsigned int coeffs[*sampsize];*/
    /* for loop implementation */
    /*double probs[*sampsize];*/
    for (k = 0; k < *sampsize; ++k) {
        probs[k] = 1/(*sampsize);
    }

    ran_multinomial(*sampsize,100,probs,coeffs);

		idx = keepF ? j * *nrnodes : 0;
		zeroInt(in, nsample);
        zeroInt(varUsed, mdim);
        /* Draw a random sample for growing a tree. */
		if (*replace) { /* sampling with replacement */
			for (n = 0; n < *sampsize; ++n) {
				xrand = unif_rand();
				k = xrand * nsample;
				in[k] = 1;
				yb[n] = y[k];
				for(m = 0; m < mdim; ++m) {
					xb[m + n * mdim] = x[m + k * mdim];
				}
			}
		} else { /* sampling w/o replacement */
			for (n = 0; n < nsample; ++n) nind[n] = n;
			last = nsample - 1;
			for (n = 0; n < *sampsize; ++n) {
				ktmp = (int) (unif_rand() * (last+1));
                k = nind[ktmp];
                swapInt(nind[ktmp], nind[last]);
				last--;
				in[k] = 1;
				yb[n] = y[k];
				for(m = 0; m < mdim; ++m) {
					xb[m + n * mdim] = x[m + k * mdim];
				}
			}
		}
		if (keepInbag) {
			for (n = 0; n < nsample; ++n) inbag[n + j * nsample] = in[n];
		}
        /* grow the regression tree */
		regTree(xb, yb, mdim, *sampsize, lDaughter + idx, rDaughter + idx,
                upper + idx, avnode + idx, nodestatus + idx, *nrnodes,
                treeSize + j, *nthsize, *mtry, mbest + idx, cat, tgini,
                varUsed, coeffs);
        /* predict the OOB data with the current tree */
		/* ytr is the prediction on OOB data by the current tree */
		predictRegTree(x, nsample, mdim, lDaughter + idx,
                       rDaughter + idx, nodestatus + idx, ytr, upper + idx,
                       avnode + idx, mbest + idx, treeSize[j], cat, *maxcat,
                       nodex);
		/* yptr is the aggregated prediction by all trees grown so far */
		errb = 0.0;
		ooberr = 0.0;
		jout = 0; /* jout is the number of cases that has been OOB so far */
		nOOB = 0; /* nOOB is the number of OOB samples for this tree */
		for (n = 0; n < nsample; ++n) {
			if (in[n] == 0) {
				nout[n]++;
                nOOB++;
				yptr[n] = ((nout[n]-1) * yptr[n] + ytr[n]) / nout[n];
				resOOB[n] = ytr[n] - y[n];
                ooberr += resOOB[n] * resOOB[n];
			}
            if (nout[n]) {
				jout++;
				errb += (y[n] - yptr[n]) * (y[n] - yptr[n]);
			}
		}
		errb /= jout;
		/* Do simple linear regression of y on yhat for bias correction. */
		if (*biasCorr) simpleLinReg(nsample, yptr, y, coef, &errb, nout);

		/* predict testset data with the current tree */
		if (*testdat) {
			predictRegTree(xts, ntest, mdim, lDaughter + idx,
						   rDaughter + idx, nodestatus + idx, ytree,
                           upper + idx, avnode + idx,
						   mbest + idx, treeSize[j], cat, *maxcat, nodexts);
			/* ytree is the prediction for test data by the current tree */
			/* yTestPred is the average prediction by all trees grown so far */
			errts = 0.0;
			for (n = 0; n < ntest; ++n) {
				yTestPred[n] = (j * yTestPred[n] + ytree[n]) / (j + 1);
			}
            /* compute testset MSE */
			if (*labelts) {
				for (n = 0; n < ntest; ++n) {
					resid = *biasCorr ?
                        yts[n] - (coef[0] + coef[1]*yTestPred[n]) :
                        yts[n] - yTestPred[n];
					errts += resid * resid;
				}
				errts /= ntest;
			}
		}
        /* Print running output. */
		if ((j + 1) % *jprint == 0) {
			Rprintf("%4d |", j + 1);
			Rprintf(" %8.4g %8.2f ", errb, 100 * errb / varY);
			if(*labelts == 1) Rprintf("| %8.4g %8.2f ",
									  errts, 100.0 * errts / varYts);
			Rprintf("|\n");
		}
		mse[j] = errb;
		if (*labelts) msets[j] = errts;

		/*  DO PROXIMITIES */
		if (*doProx) {
			computeProximity(prox, *oobprox, nodex, in, oobpair, nsample);
			/* proximity for test data */
			if (*testdat) {
                /* In the next call, in and oobpair are not used. */
                computeProximity(proxts, 0, nodexts, in, oobpair, ntest);
				for (n = 0; n < ntest; ++n) {
					for (k = 0; k < nsample; ++k) {
						if (nodexts[n] == nodex[k]) {
							proxts[n + ntest * (k+ntest)] += 1.0;
						}
					}
				}
			}
		}

		/* Variable importance */
		if (varImp) {
			for (mr = 0; mr < mdim; ++mr) {
                if (varUsed[mr]) { /* Go ahead if the variable is used */
                    /* make a copy of the m-th variable into xtmp */
                    for (n = 0; n < nsample; ++n)
                        xtmp[n] = x[mr + n * mdim];
                    ooberrperm = 0.0;
                    for (k = 0; k < nPerm; ++k) {
                        permuteOOB(mr, x, in, nsample, mdim);
                        predictRegTree(x, nsample, mdim, lDaughter + idx,
                                       rDaughter + idx, nodestatus + idx, ytr,
                                       upper + idx, avnode + idx, mbest + idx,
                                       treeSize[j], cat, *maxcat, nodex);
                        for (n = 0; n < nsample; ++n) {
                            if (in[n] == 0) {
                                r = ytr[n] - y[n];
                                ooberrperm += r * r;
                                if (localImp) {
                                    impmat[mr + n * mdim] +=
                                        (r*r - resOOB[n]*resOOB[n]) / nPerm;
                                }
                            }
                        }
                    }
                    delta = (ooberrperm / nPerm - ooberr) / nOOB;
                    errimp[mr] += delta;
                    impSD[mr] += delta * delta;
                    /* copy original data back */
                    for (n = 0; n < nsample; ++n)
                        x[mr + n * mdim] = xtmp[n];
                }
            }
        }
    }
    PutRNGstate();
    /* end of tree iterations=======================================*/

    if (*biasCorr) {  /* bias correction for predicted values */
		for (n = 0; n < nsample; ++n) {
			if (nout[n]) yptr[n] = coef[0] + coef[1] * yptr[n];
		}
		if (*testdat) {
			for (n = 0; n < ntest; ++n) {
				yTestPred[n] = coef[0] + coef[1] * yTestPred[n];
			}
		}
    }

    if (*doProx) {
		for (n = 0; n < nsample; ++n) {
			for (k = n + 1; k < nsample; ++k) {
                prox[nsample*k + n] /= *oobprox ?
                    (oobpair[nsample*k + n] > 0 ? oobpair[nsample*k + n] : 1) :
                    *nTree;
                prox[nsample * n + k] = prox[nsample * k + n];
            }
			prox[nsample * n + n] = 1.0;
        }
		if (*testdat) {
			for (n = 0; n < ntest; ++n)
				for (k = 0; k < ntest + nsample; ++k)
					proxts[ntest*k + n] /= *nTree;
		}
    }

    if (varImp) {
		for (m = 0; m < mdim; ++m) {
			errimp[m] = errimp[m] / *nTree;
			impSD[m] = sqrt( ((impSD[m] / *nTree) -
							  (errimp[m] * errimp[m])) / *nTree );
			if (localImp) {
                for (n = 0; n < nsample; ++n) {
                    impmat[m + n * mdim] /= nout[n];
                }
			}
        }
    }
    for (m = 0; m < mdim; ++m) tgini[m] /= *nTree;
}
Example #29
0
// [[register]]
SEXP mcmcbas(SEXP Y, SEXP X, SEXP Rweights, SEXP Rprobinit, SEXP Rmodeldim, SEXP incint, SEXP Ralpha,SEXP method, SEXP modelprior, SEXP Rupdate, SEXP Rbestmodel,  SEXP plocal,
             SEXP BURNIN_Iterations, SEXP MCMC_Iterations, SEXP LAMBDA, SEXP DELTA,
             SEXP Rparents)
{

  SEXP   RXwork = PROTECT(duplicate(X)), RYwork = PROTECT(duplicate(Y));
  int nProtected = 2, nUnique=0, newmodel=0;
  int nModels=LENGTH(Rmodeldim);

  //  Rprintf("Allocating Space for %d Models\n", nModels) ;
  SEXP ANS = PROTECT(allocVector(VECSXP, 15)); ++nProtected;
  SEXP ANS_names = PROTECT(allocVector(STRSXP, 15)); ++nProtected;
  SEXP Rprobs = PROTECT(duplicate(Rprobinit)); ++nProtected;
  SEXP MCMCprobs= PROTECT(duplicate(Rprobinit)); ++nProtected;
  SEXP R2 = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
  SEXP shrinkage = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
  SEXP modelspace = PROTECT(allocVector(VECSXP, nModels)); ++nProtected;
  SEXP modeldim =  PROTECT(duplicate(Rmodeldim)); ++nProtected;
  SEXP counts =  PROTECT(duplicate(Rmodeldim)); ++nProtected;
  SEXP beta = PROTECT(allocVector(VECSXP, nModels)); ++nProtected;
  SEXP se = PROTECT(allocVector(VECSXP, nModels)); ++nProtected;
  SEXP mse = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
  SEXP modelprobs = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
  SEXP priorprobs = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
  SEXP logmarg = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
  SEXP sampleprobs = PROTECT(allocVector(REALSXP, nModels)); ++nProtected;
  SEXP NumUnique = PROTECT(allocVector(INTSXP, 1)); ++nProtected;
  SEXP Rse_m = NULL, Rcoef_m = NULL, Rmodel_m;

  double *Xwork, *Ywork, *wts, *coefficients,*probs, shrinkage_m, *MCMC_probs,
    SSY, yty, mse_m, *se_m, MH=0.0, prior_m=1.0, *real_model,
    R2_m, RSquareFull, alpha, prone, denom, logmargy, postold, postnew;
  int nobs, p, k, i, j, m, n, l, pmodel, pmodel_old, *xdims, *model_m, *bestmodel, *varin, *varout;
  int mcurrent,  update, n_sure;
  double  mod, rem, problocal, *pigamma,  eps, *hyper_parameters;
  double *XtX, *XtY, *XtXwork, *XtYwork, *SSgam, *Cov, *priorCov, *marg_probs;
  double  lambda,  delta, one=1.0;

  int inc=1;
  int *model, *modelold, bit, *modelwork, old_loc, new_loc;
  //  char uplo[] = "U", trans[]="T";
  struct Var *vars;	/* Info about the model variables. */
  NODEPTR tree, branch;

  /* get dimsensions of all variables */


  nobs = LENGTH(Y);
  xdims = INTEGER(getAttrib(X,R_DimSymbol));
  p = xdims[1];
  k = LENGTH(modelprobs);
  update = INTEGER(Rupdate)[0];
  lambda=REAL(LAMBDA)[0];
  delta = REAL(DELTA)[0];
  //  Rprintf("delta %f lambda %f", delta, lambda);
  eps = DBL_EPSILON;
  problocal = REAL(plocal)[0];
  //  Rprintf("Update %i and prob.switch %f\n", update, problocal);
  /* Extract prior on models  */
  hyper_parameters = REAL(getListElement(modelprior,"hyper.parameters"));

  /*  Rprintf("n %d p %d \n", nobs, p);  */

  Ywork = REAL(RYwork);
  Xwork = REAL(RXwork);
  wts = REAL(Rweights);


 /* Allocate other variables.  */

  PrecomputeData(Xwork, Ywork, wts, &XtXwork, &XtYwork, &XtX, &XtY, &yty, &SSY, p, nobs);

  alpha = REAL(Ralpha)[0];

  vars = (struct Var *) R_alloc(p, sizeof(struct Var));
  probs =  REAL(Rprobs);
  n = sortvars(vars, probs, p);

  for (i =n; i <p; i++) REAL(MCMCprobs)[vars[i].index] = probs[vars[i].index];
  for (i =0; i <n; i++) REAL(MCMCprobs)[vars[i].index] = 0.0;
  MCMC_probs =  REAL(MCMCprobs);


  pigamma = vecalloc(p);
  real_model = vecalloc(n);
  marg_probs = vecalloc(n);
  modelold = ivecalloc(p);
  model = ivecalloc(p);
  modelwork= ivecalloc(p);
  varin= ivecalloc(p);
  varout= ivecalloc(p);


  /* create gamma gamma' matrix */
  SSgam  = (double *) R_alloc(n * n, sizeof(double));
  Cov  = (double *) R_alloc(n * n, sizeof(double));
  priorCov  = (double *) R_alloc(n * n, sizeof(double));
  for (j=0; j < n; j++) {
    for (i = 0; i < n; i++) {
      SSgam[j*n + i] = 0.0;
      Cov[j*n + i] = 0.0;
      priorCov[j*n + i] = 0.0;
      if (j == i)  priorCov[j*n + i] = lambda;
    }
    marg_probs[i] = 0.0;
  }





  RSquareFull = CalculateRSquareFull(XtY, XtX, XtXwork, XtYwork, Rcoef_m, Rse_m, p, nobs, yty, SSY);


  /* fill in the sure things */
  for (i = n, n_sure = 0; i < p; i++)  {
      model[vars[i].index] = (int) vars[i].prob;
      if (model[vars[i].index] == 1) ++n_sure;
  }


  GetRNGstate();
  tree = make_node(-1.0);

  /*  Rprintf("For m=0, Initialize Tree with initial Model\n");  */

  m = 0;
  bestmodel = INTEGER(Rbestmodel);

  INTEGER(modeldim)[m] = n_sure;

  /* Rprintf("Create Tree\n"); */
   branch = tree;

   for (i = 0; i< n; i++) {
      bit =  bestmodel[vars[i].index];
      if (bit == 1) {
	if (i < n-1 && branch->one == NULL)
	  branch->one = make_node(-1.0);
	if (i == n-1 && branch->one == NULL)
	  branch->one = make_node(0.0);
	branch = branch->one;
      }
      else {
	if (i < n-1 && branch->zero == NULL)
	  branch->zero = make_node(-1.0);
	if (i == n-1 && branch->zero == NULL)
	  branch->zero = make_node(0.0);
	branch = branch->zero;
      }

      model[vars[i].index] = bit;
      INTEGER(modeldim)[m]  += bit;
      branch->where = 0;
   }



    /*    Rprintf("Now get model specific calculations \n"); */

    pmodel = INTEGER(modeldim)[m];
    PROTECT(Rmodel_m = allocVector(INTSXP,pmodel));
    model_m = INTEGER(Rmodel_m);

      for (j = 0, l=0; j < p; j++) {
      	if (model[j] == 1) {
            model_m[l] = j;
           l +=1;}
      }

    SET_ELEMENT(modelspace, m, Rmodel_m);

    Rcoef_m = NEW_NUMERIC(pmodel); PROTECT(Rcoef_m);
    Rse_m = NEW_NUMERIC(pmodel);   PROTECT(Rse_m);
    coefficients = REAL(Rcoef_m);
    se_m = REAL(Rse_m);

      for (j=0, l=0; j < pmodel; j++) {
        XtYwork[j] = XtY[model_m[j]];
        for  ( i = 0; i < pmodel; i++) {
	         XtXwork[j*pmodel + i] = XtX[model_m[j]*p + model_m[i]];
	}
      }

    R2_m = 0.0;
    mse_m = yty;
    memcpy(coefficients, XtYwork, sizeof(double)*pmodel);
    cholreg(XtYwork, XtXwork, coefficients, se_m, &mse_m, pmodel, nobs);

    if (pmodel > 1)   R2_m = 1.0 - (mse_m * (double) ( nobs - pmodel))/SSY;

    SET_ELEMENT(beta, m, Rcoef_m);
    SET_ELEMENT(se, m, Rse_m);

    REAL(R2)[m] = R2_m;
    REAL(mse)[m] = mse_m;

    gexpectations(p, pmodel, nobs, R2_m, alpha, INTEGER(method)[0], RSquareFull, SSY, &logmargy, &shrinkage_m);

    REAL(sampleprobs)[m] = 1.0;
    REAL(logmarg)[m] = logmargy;
    REAL(shrinkage)[m] = shrinkage_m;
    prior_m  = compute_prior_probs(model,pmodel,p, modelprior);
    REAL(priorprobs)[m] = prior_m;

    UNPROTECT(3);


    old_loc = 0;
    pmodel_old = pmodel;
    nUnique=1;
    INTEGER(counts)[0] = 0;
    postold =  REAL(logmarg)[m] + log(REAL(priorprobs)[m]);
    memcpy(modelold, model, sizeof(int)*p);
  /*   Rprintf("model %d max logmarg %lf\n", m, REAL(logmarg)[m]); */

    /*  Rprintf("Now Sample the Rest of the Models \n");  */


  m = 0;

  while (nUnique < k && m < INTEGER(BURNIN_Iterations)[0]) {

    memcpy(model, modelold, sizeof(int)*p);
    pmodel =  n_sure;
    MH = 1.0;

    if (pmodel_old == n_sure || pmodel_old == n_sure + n){
	MH =  random_walk(model, vars,  n);
	MH =  1.0 - problocal;
    }
    else {
      if (unif_rand() < problocal) {
      // random
	MH =  random_switch(model, vars, n, pmodel_old, varin, varout );
      }
      else {
      // Randomw walk proposal flip bit//
	MH =  random_walk(model, vars,  n);
      }
    }

    branch = tree;
    newmodel= 0;

    for (i = 0; i< n; i++) {
      bit =  model[vars[i].index];

      if (bit == 1) {
	if (branch->one != NULL) branch = branch->one;
	else newmodel = 1;
	}
      else {
	if (branch->zero != NULL)  branch = branch->zero;
	else newmodel = 1.0;
      }
      pmodel  += bit;
    }

    if (pmodel  == n_sure || pmodel == n + n_sure)  MH = 1.0/(1.0 - problocal);

    if (newmodel == 1) {
      new_loc = nUnique;
      PROTECT(Rmodel_m = allocVector(INTSXP,pmodel));
      model_m = INTEGER(Rmodel_m);
      for (j = 0, l=0; j < p; j++) {
	if (model[j] == 1) {
	  model_m[l] = j;
	  l +=1;}
      }

      Rcoef_m = NEW_NUMERIC(pmodel); PROTECT(Rcoef_m);
      Rse_m = NEW_NUMERIC(pmodel);   PROTECT(Rse_m);
      coefficients = REAL(Rcoef_m);
      se_m = REAL(Rse_m);
      for (j=0, l=0; j < pmodel; j++) {
        XtYwork[j] = XtY[model_m[j]];
        for  ( i = 0; i < pmodel; i++) {
	  XtXwork[j*pmodel + i] = XtX[model_m[j]*p + model_m[i]];
	}
      }
      R2_m = 0.0;
      mse_m = yty;
      memcpy(coefficients, XtYwork, sizeof(double)*pmodel);
      cholreg(XtYwork, XtXwork, coefficients, se_m, &mse_m, pmodel, nobs);
      if (pmodel > 1)  R2_m = 1.0 - (mse_m * (double) ( nobs - pmodel))/SSY;
      prior_m = compute_prior_probs(model,pmodel,p, modelprior);
      gexpectations(p, pmodel, nobs, R2_m, alpha, INTEGER(method)[0], RSquareFull, SSY, &logmargy, &shrinkage_m);
      postnew = logmargy + log(prior_m);
    }
    else {
      new_loc = branch->where;
      postnew =  REAL(logmarg)[new_loc] + log(REAL(priorprobs)[new_loc]);
    }

    MH *= exp(postnew - postold);
    //    Rprintf("MH new %lf old %lf\n", postnew, postold);
    if (unif_rand() < MH) {

      if (newmodel == 1)  {
	new_loc = nUnique;
	insert_model_tree(tree, vars, n, model, nUnique);

	INTEGER(modeldim)[nUnique] = pmodel;
	SET_ELEMENT(modelspace, nUnique, Rmodel_m);

	SET_ELEMENT(beta, nUnique, Rcoef_m);
	SET_ELEMENT(se, nUnique, Rse_m);

	REAL(R2)[nUnique] = R2_m;
	REAL(mse)[nUnique] = mse_m;
	REAL(sampleprobs)[nUnique] = 1.0;
	REAL(logmarg)[nUnique] = logmargy;
	REAL(shrinkage)[nUnique] = shrinkage_m;
	REAL(priorprobs)[nUnique] = prior_m;
	UNPROTECT(3);
	++nUnique;
      }

      old_loc = new_loc;
      postold = postnew;
      pmodel_old = pmodel;
      memcpy(modelold, model, sizeof(int)*p);
    }
    else  {
      if (newmodel == 1) UNPROTECT(3);
    }

    INTEGER(counts)[old_loc] += 1;

    for (i = 0; i < n; i++) {
      /* store in opposite order so nth variable is first */
     real_model[n-1-i] = (double) modelold[vars[i].index];
     REAL(MCMCprobs)[vars[i].index] += (double) modelold[vars[i].index];
   }

   // Update SSgam = gamma gamma^T + SSgam
   F77_NAME(dsyr)("U", &n,  &one, &real_model[0], &inc,  &SSgam[0], &n);
   m++;
  }

 for (i = 0; i < n; i++) {
     REAL(MCMCprobs)[vars[i].index] /= (double) m;
 }
  //  Rprintf("\n%d \n", nUnique);


// Compute marginal probabilities
  mcurrent = nUnique;
  compute_modelprobs(modelprobs, logmarg, priorprobs,mcurrent);
  compute_margprobs(modelspace, modeldim, modelprobs, probs, mcurrent, p);



//  Now sample W/O Replacement
// Rprintf("NumUnique Models Accepted %d \n", nUnique);
 INTEGER(NumUnique)[0] = nUnique;


 if (nUnique < k) {
   update_probs(probs, vars, mcurrent, k, p);
   update_tree(modelspace, tree, modeldim, vars, k,p,n,mcurrent, modelwork);
  for (m = nUnique;  m < k; m++) {
    for (i = n; i < p; i++)  {
      INTEGER(modeldim)[m]  +=  model[vars[i].index];
    }

    branch = tree;

    for (i = 0; i< n; i++) {
      pigamma[i] = 1.0;
      bit =  withprob(branch->prob);

      /*    branch->done += 1; */

	if (bit == 1) {
	  for (j=0; j<=i; j++)  pigamma[j] *= branch->prob;
	  if (i < n-1 && branch->one == NULL)
	    branch->one = make_node(vars[i+1].prob);
          if (i == n-1 && branch->one == NULL)
	    branch->one = make_node(0.0);
	  branch = branch->one;
	}
        else {
	  for (j=0; j<=i; j++)  pigamma[j] *= (1.0 - branch->prob);
	  if (i < n-1 && branch->zero == NULL)
	    branch->zero = make_node(vars[i+1].prob);
          if (i == n-1 && branch->zero == NULL)
	    branch->zero = make_node(0.0);
	  branch = branch->zero;
	  }
	model[vars[i].index] = bit;
	INTEGER(modeldim)[m]  += bit;
    }

    REAL(sampleprobs)[m] = pigamma[0];
    pmodel = INTEGER(modeldim)[m];

    /* Now subtract off the visited probability mass. */
    branch=tree;
    for (i = 0; i < n; i++) {
      bit = model[vars[i].index];
      prone = branch->prob;
      if (bit == 1) prone -= pigamma[i];
      denom = 1.0 - pigamma[i];
      if (denom <= 0.0) {
	if (denom < 0.0) {
	  warning("neg denominator %le %le %le !!!\n", pigamma, denom, prone);
	  if (branch->prob < 0.0 && branch->prob < 1.0)
	    warning("non extreme %le\n", branch->prob);}
        denom = 0.0;}
      else {
	if  (prone <= 0)  prone = 0.0;
	if  (prone > denom)  {
          if (prone <= eps) prone = 0.0;
	  else prone = 1.0;
	  /* Rprintf("prone > 1 %le %le %le %le !!!\n", pigamma, denom, prone, eps);*/
	}
	else prone = prone/denom;
      }
      if (prone > 1.0 || prone < 0.0)
		Rprintf("%d %d Probability > 1!!! %le %le  %le %le \n",
		m, i, prone, branch->prob, denom, pigamma);


      /*      if (bit == 1)  pigamma /= (branch->prob);
	      else  pigamma /= (1.0 - branch->prob);
	      if (pigamma > 1.0) pigamma = 1.0; */
      branch->prob  = prone;
      if (bit == 1) branch = branch->one;
      else  branch = branch->zero;

      /*      Rprintf("%d %d \n",  branch->done, n - i); */
      /*      if (log((double) branch->done) < (n - i)*log(2.0)) {
	if (bit == 1) branch = branch->one;
	else  branch = branch->zero;
      }
      else {
	    branch->one = NULL;
	    branch->zero = NULL;
	    break; } */
    }

    /* Now get model specific calculations */

      PROTECT(Rmodel_m = allocVector(INTSXP, pmodel));
      model_m = INTEGER(Rmodel_m);

      for (j = 0, l=0; j < p; j++) {
	if (model[j] == 1) {
           model_m[l] = j;
           l +=1;}
      }


     SET_ELEMENT(modelspace, m, Rmodel_m);

      for (j=0, l=0; j < pmodel; j++) {
         XtYwork[j] = XtY[model_m[j]];
        for  ( i = 0; i < pmodel; i++) {
	 XtXwork[j*pmodel + i] = XtX[model_m[j]*p + model_m[i]];
	}

      }


      PROTECT(Rcoef_m = allocVector(REALSXP,pmodel));
      PROTECT(Rse_m = allocVector(REALSXP,pmodel));
      coefficients = REAL(Rcoef_m);
      se_m = REAL(Rse_m);

    mse_m = yty;
    memcpy(coefficients, XtYwork, sizeof(double)*pmodel);
    cholreg(XtYwork, XtXwork, coefficients, se_m, &mse_m, pmodel, nobs);


/*    olsreg(Ywork, Xwork, coefficients, se_m, &mse_m, &pmodel, &nobs, pivot,qraux,work,residuals,effects,v,betaols);   */
    if (pmodel > 1)  R2_m = 1.0 - (mse_m * (double) ( nobs - pmodel))/SSY;

    SET_ELEMENT(beta, m, Rcoef_m);
    SET_ELEMENT(se, m, Rse_m);

    REAL(R2)[m] = R2_m;
    REAL(mse)[m] = mse_m;

   gexpectations(p, pmodel, nobs, R2_m, alpha, INTEGER(method)[0],  RSquareFull, SSY, &logmargy, &shrinkage_m);
   REAL(logmarg)[m] = logmargy;
   REAL(shrinkage)[m] = shrinkage_m;
   REAL(priorprobs)[m] = compute_prior_probs(model,pmodel,p, modelprior);


    if (m > 1) {
      rem = modf((double) m/(double) update, &mod);
      if (rem  == 0.0) {
	mcurrent = m;
	compute_modelprobs(modelprobs, logmarg, priorprobs,mcurrent);
	compute_margprobs(modelspace, modeldim, modelprobs, probs, mcurrent, p);
	if (update_probs(probs, vars, mcurrent, k, p) == 1) {
//	  Rprintf("Updating Model Tree %d \n", m);
	  update_tree(modelspace, tree, modeldim, vars, k,p,n,mcurrent, modelwork);
	}

      }}
    UNPROTECT(3);
  }
 }

 compute_modelprobs(modelprobs, logmarg, priorprobs,k);
 compute_margprobs(modelspace, modeldim, modelprobs, probs, k, p);

  SET_VECTOR_ELT(ANS, 0, Rprobs);
  SET_STRING_ELT(ANS_names, 0, mkChar("probne0"));

  SET_VECTOR_ELT(ANS, 1, modelspace);
  SET_STRING_ELT(ANS_names, 1, mkChar("which"));

  SET_VECTOR_ELT(ANS, 2, logmarg);
  SET_STRING_ELT(ANS_names, 2, mkChar("logmarg"));

  SET_VECTOR_ELT(ANS, 3, modelprobs);
  SET_STRING_ELT(ANS_names, 3, mkChar("postprobs"));

  SET_VECTOR_ELT(ANS, 4, priorprobs);
  SET_STRING_ELT(ANS_names, 4, mkChar("priorprobs"));

  SET_VECTOR_ELT(ANS, 5,sampleprobs);
  SET_STRING_ELT(ANS_names, 5, mkChar("sampleprobs"));

  SET_VECTOR_ELT(ANS, 6, mse);
  SET_STRING_ELT(ANS_names, 6, mkChar("mse"));

  SET_VECTOR_ELT(ANS, 7, beta);
  SET_STRING_ELT(ANS_names, 7, mkChar("mle"));

  SET_VECTOR_ELT(ANS, 8, se);
  SET_STRING_ELT(ANS_names, 8, mkChar("mle.se"));

  SET_VECTOR_ELT(ANS, 9, shrinkage);
  SET_STRING_ELT(ANS_names, 9, mkChar("shrinkage"));

  SET_VECTOR_ELT(ANS, 10, modeldim);
  SET_STRING_ELT(ANS_names, 10, mkChar("size"));

  SET_VECTOR_ELT(ANS, 11, R2);
  SET_STRING_ELT(ANS_names, 11, mkChar("R2"));

  SET_VECTOR_ELT(ANS, 12, counts);
  SET_STRING_ELT(ANS_names, 12, mkChar("freq"));

  SET_VECTOR_ELT(ANS, 13, MCMCprobs);
  SET_STRING_ELT(ANS_names, 13, mkChar("probs.MCMC"));

  SET_VECTOR_ELT(ANS, 14, NumUnique);
  SET_STRING_ELT(ANS_names, 14, mkChar("n.Unique"));

  setAttrib(ANS, R_NamesSymbol, ANS_names);
  UNPROTECT(nProtected);
//  Rprintf("Return\n");
  PutRNGstate();

  return(ANS);
}
Example #30
0
size_t analysis(unsigned short *incidence,size_t ncol,size_t nrow,double *scores,size_t step,size_t max_iter,size_t verbose,unsigned int seed)
{


	set_rand(seed);
	size_t i,j,kk,n,rand1,rand2;
	size_t dim=max_iter+1;
	size_t *from;
  	size_t *to;
  	unsigned short *matrix;

	size_t a,b,c,d,e=0;
	size_t index=1;
  do matrix=(unsigned short *)calloc(nrow*ncol,sizeof(unsigned short)); while(matrix==NULL);
	for(i=0;i<nrow;++i)
	{
		
    for(j=0;j<ncol;++j)
			{
				matrix[i*ncol+j]=incidence[i*ncol+j];
				e+=incidence[i*ncol+j];
			}
  }
	//initialization of score vector overwriting the original
	//do	scores=(double*)calloc(dim,sizeof(double));	while(scores==NULL);
	for(i=0;i<dim;scores[i++]=0.0);
	scores[0]=1.0;
 	do	from=(size_t*)calloc(e,sizeof(size_t));	while(from==NULL);
	do	to=(size_t*)calloc(e,sizeof(size_t));	  while(to==NULL);
 	kk=0;
	for(i=0;i<nrow;++i)
		for(j=0;j<ncol;++j)
			if(matrix[i*ncol+j]==1)
				{
					from[kk]=i;
					to[kk]=j;
					kk++;
				}
	time_t  tin,tfin;
	tin = time (NULL);
	//GetRNGstate();
	for(n=0;n<max_iter;n++)
	{
		//random rewiring
    if(verbose==1)
  		loadBar( n,  max_iter, 100,  50);
		rand1=(size_t) (unif_rand()*e);
		do	rand2=(size_t) (unif_rand()*e);	while (rand1==rand2);
		a=from[rand1];
		c=from[rand2];
		b=to[rand1];
		d=to[rand2];
		//printf("%d %d %d %d %d %d %d %d %d\n",e,a,b,c,d,rand1,rand2,t,n);
		if(a!=c && d!=b && incidence[a*ncol+d]==0 && incidence[c*ncol+b]==0)
			{
				to[rand1]=d;
				to[rand2]=b;
				incidence[a*ncol+d]=incidence[c*ncol+b]=1;
				incidence[a*ncol+b]=incidence[c*ncol+d]=0;

			}

			if(n%step==0)
			(scores)[index++]=similarity(incidence,matrix, ncol,nrow, e);
	}
	tfin = time (NULL);
	//PutRNGstate();
	if(verbose==1)
    printf("DONE in %d seconds \n",-(tin-tfin));

	return (index-1);
}