示例#1
0
CpTable
make_cp_table(pNode me, double parent, int nsplit)
{
    CpTable cplist;

    if (me->leftson) {          /* if there are splits below */
	/*
	 * The 2 lines below are perhaps devious
	 *  1) Since the return value depends on ones parent, both calls will
	 *       return the same thing.
	 *  2) I send 0 to the left to keep the current split (me) from
	 *       being counted twice, once by each child.
	 */
	make_cp_table(me->leftson, me->complexity, 0);
	cplist = make_cp_table(me->rightson, me->complexity, nsplit + 1);
    } else
	cplist = cptable_tail;

    while (cplist->cp < parent) {
	cplist->risk += me->risk;
	cplist->nsplit += nsplit;
	cplist = cplist->back;
    }

    return cplist;
}
示例#2
0
文件: rpart.c 项目: cran/itree
//ALG 4/2/2012: added argument for penalty fcn
//impscale goes in parms
int rpart(int n,         int nvarx,      Sint *ncat,     int method, 
          int penalty, int  maxpri,
          double *parms,  double *ymat,   FLOAT *xmat,
          Sint *missmat, struct cptable *cptable,
	  struct node **tree,            char **error,   int *which,
	  int xvals,     Sint *x_grp,    double *wt,     double *opt,
	  int ny,        double *cost) {

	int i,k;
    int maxcat;
    double temp;

    /*
    ** initialize the splitting functions from the function table
    ** This is what we seek to minimize/maximize by virtue of splitting the node
    ** ALG 5/1/2012: added parent_objective, which returns scaling factor for the 'improve'
    ** number calculated by rp_choose. This is necessary in case in needs to be different from
    ** the 'risk' calculated by rp_eval.
    */
    if (method <= NUM_METHODS) {
    	//Rprintf("%d \n",method);  looks good!
		i = method -1;
		rp_init   = func_table_objective[i].init_split;
		rp_choose = func_table_objective[i].choose_split;
		rp_eval   = func_table_objective[i].eval;
		rp_error  = func_table_objective[i].error;
		rp_parent_objective = func_table_objective[i].parent_objective;
		rp.num_y  = ny;
		rp.method_number = i;
	}
    else {
    	*error = "Invalid value for 'method'";
    	return(1);
	}

    /*
     * ALG 4/4/2012.
     * Initialize the penalty on new variables.
     */
    if(penalty <= NUM_PENALTY){
    	i = penalty - 1;
    	rp.penalty_number = i;
    	rp_penalty = func_table_penalty[i].penalty_fcn;
    }
    else{
    	*error = "Invalid value for 'penalty'";
    	return(1);
    }

    /*
     * ALG 4/11/2012
     * Initialize the improve function- this is the function
     * that combines the penalty and splitting objective so that
     * penalty is always btwn 0-1.  The main choice is to scale
     * the objective by the parent or the root value.
     *
     * In R code if no improve was chosen then a default is picked.
	 * If not, R checks that combination of objective/penalty/improve
	 * makes sense.
	*/
    int impscale;  //scale by root or parent
    impscale = (int) opt[8];
    if(impscale <= NUM_IMPROVE){
    	rp.impscale_number = impscale;
    	rp_improve = func_table_improve[impscale-1].improve_fcn;
    }
    else{
    	*error = "Invalid value for improve scaling - should be either 1 (parent) or 2 (root)";
    	 return(1);
    }

    /*
    ** set some other parameters
    */
    rp.collapse_is_possible = 1; //most methods this is possible, where it's not this is fixed in the init fcn.
    rp.min_node =  (int) opt[1];
    rp.min_split = (int) opt[0];
    rp.complexity= opt[2]; //cp parameter
    rp.maxsur = (int) opt[4];
    rp.usesurrogate = (int) opt[5];
    rp.sur_agree = (int) opt[6];
    rp.maxnode  = (int) pow((double)2.0, opt[7]) -1;
    rp.nvar = nvarx;
    rp.numcat = ncat;
    rp.maxpri = maxpri;
    if (maxpri <1) rp.maxpri =1;
    rp.n = n;
    rp.which = which;
    rp.wt    = wt;
    rp.iscale = 0.0;
    rp.vcost  = cost;
    rp.max_depth = 32;
    int temp2[rp.max_depth];  /* ALG 1/16/2012:  initial vector for variables_used */


    /*
     * ALG 2/11/2012: set rp.splitparams
     */
    rp.splitparams = (double *)ALLOC(2, sizeof(double *));
    rp.splitparams[0] = opt[9];  //alpha
    rp.splitparams[1] = opt[10];  //beta
    //Rprintf("%d",rp.splitparams[0]);  //fine

    /*
    ** create the "ragged array" pointers to the matrix
    **   x and missmat are in column major order
    **   y is in row major order
    */
    rp.xdata = (FLOAT **) ALLOC(nvarx, sizeof(FLOAT *));
    for (i=0; i<nvarx; i++) {
    	rp.xdata[i] = &(xmat[i*n]);
	}
    rp.ydata = (double **) ALLOC(n, sizeof(double *));
    for (i=0; i<n; i++)  rp.ydata[i] = &(ymat[i*rp.num_y]);

    /*
    ** allocate some scratch
    */
    rp.tempvec = (int *)ALLOC(n, sizeof(int));
    rp.xtemp = (FLOAT *)ALLOC(n, sizeof(FLOAT));
    rp.ytemp = (double **)ALLOC(n, sizeof(double *));
    rp.wtemp = (double *)ALLOC(n, sizeof(double));

    /*
    ** create a matrix of sort indices, one for each continuous variable
    **   This sort is "once and for all".  The result is stored on top
    **   of the 'missmat' array.
    ** I don't have to sort the categoricals.
    */
    rp.sorts  = (Sint**) ALLOC(nvarx, sizeof(Sint *));
    maxcat=0;
    for (i=0; i<nvarx; i++) {
	rp.sorts[i] = &(missmat[i*n]);
	for (k=0; k<n; k++) {
	    if (rp.sorts[i][k]==1) {
	    	rp.tempvec[k] = -(k+1);
	    	rp.xdata[i][k]=0;   /*weird numerics might destroy 'sort'*/
		}
	    else                   rp.tempvec[k] =  k;
	    }
	if (ncat[i]==0)  mysort(0, n-1, rp.xdata[i], rp.tempvec);
	else if (ncat[i] > maxcat)  maxcat = ncat[i];
	for (k=0; k<n; k++) rp.sorts[i][k] = rp.tempvec[k];
	}

    /*
    ** And now the last of my scratch space
    */
    if (maxcat >0) {
	rp.csplit = (int *) ALLOC(3*maxcat, sizeof(int));
	rp.lwt    = (double *) ALLOC(2*maxcat, sizeof(double));
	rp.left = rp.csplit + maxcat;
	rp.right= rp.left   + maxcat;
	rp.rwt  = rp.lwt    + maxcat;
	}
    else rp.csplit = (int *)ALLOC(1, sizeof(int));

    /*
    ** initialize the top node of the tree
    */
    temp =0;
    for (i=0; i<n; i++) {
    	which[i] =1;
    	temp += wt[i];
	}
    
    /* ALG: haven't split on anything so far... */
    for (i=0; i< rp.max_depth; i++){
    	temp2[i] = -1;
    }

    i = rp_init(n, rp.ydata, maxcat, error, parms, &rp.num_resp, 1, wt);
    nodesize = sizeof(struct node) + (rp.num_resp-2)*sizeof(double);
    *tree = (struct node *) CALLOC(1, nodesize);
    (*tree)->num_obs = n;
    (*tree)->sum_wt  = temp;

    if (i>0) return(i);
    //ALG 5/1/2012. Calculate the root's objective for scaling improve, and set
    // rp.root_objective_scaling.
    rp.dummy = (double *) ALLOC(1,sizeof(double)); //7/18/2012: allocate
    (*rp_parent_objective)(n, rp.ydata, rp.dummy, &rp.root_objective_scaling, wt);

    (*rp_eval)(n, rp.ydata, (*tree)->response_est, &((*tree)->risk), wt);
    (*tree)->complexity = (*tree)->risk;
    rp.alpha = rp.complexity * (*tree)->risk;

    //Rprintf("Initialization complete. \n");
    /*
    ** Do the basic tree
    */
    partition(1, (*tree), &temp, temp2);


    //deal with the complexity table.
    cptable->cp = (*tree)->complexity;
    cptable->risk = (*tree)->risk;
    cptable->nsplit = 0;
    cptable->forward =0;
    cptable->xrisk =0;
    cptable->xstd =0;
    rp.num_unique_cp =1;

    if ((*tree)->rightson ==0) return(0); /* Nothing more needs to be done */

    make_cp_list((*tree), (*tree)->complexity, cptable);
    make_cp_table((*tree), (*tree)->complexity, 0);

    if (xvals >1 && (*tree)->rightson !=0){ 
    	xval(xvals, cptable, x_grp, maxcat, error, parms);
    }
    /*
    ** all done
    */
    return(0);
 }