예제 #1
0
void
surrogate(pNode me, int n1, int n2)
{
    int i, j, k;
    int var;                    /* the primary split variable */
    double split;
    double improve;
    double lcount, rcount;      /* weight sent left and right by
				 * primary */
    int extra;
    pSplit ss;
    int *index;
    int *tempy;
    int **sorts;
    double **xdata;
    int ncat;
    double adj_agree;

    tempy = ct.tempvec;
    sorts = ct.sorts;
    xdata = ct.xdata;
    /*
     * First construct, in tempy, the "y" variable for this calculation.
     * It will be LEFT:goes left, 0:missing, RIGHT:goes right.
     *  Count up the number of obs the primary sends to the left, as my
     *  last surrogate (or to the right, if larger).
     */
    var = (me->primary)->var_num;
    if (ct.numcat[var] == 0) {  /* continuous variable */
	split = (me->primary)->spoint;
	extra = (me->primary)->csplit[0];
	for (i = n1; i < n2; i++) {
	    j = sorts[var][i];
	    if (j < 0)
		tempy[-(j + 1)] = 0;
	    else
		tempy[j] = (xdata[var][j] < split) ? extra : -extra;
	}
    } else {                    /* categorical variable */
	index = (me->primary)->csplit;
	for (i = n1; i < n2; i++) {
	    j = sorts[var][i];
	    if (j < 0)
		tempy[-(j + 1)] = 0;
	    else
		tempy[j] = index[(int) xdata[var][j] - 1];
	}
    }

    /* count the total number sent left and right */
    lcount = 0;
    rcount = 0;
    for (i = n1; i < n2; i++) {
	j = sorts[var][i];
	if (j < 0)
	    j = -(1 + j);
	switch (tempy[j]) {
	case LEFT:
	    lcount += ct.wt[j];
	    break;
	case RIGHT:
	    rcount += ct.wt[j];
	    break;
	default:
	    break;
	}
    }

    if (lcount < rcount)
	me->lastsurrogate = RIGHT;
    else {
	if (lcount > rcount)
	    me->lastsurrogate = LEFT;
	else
	    me->lastsurrogate = 0;      /* no default */
    }

    /*
     * Now walk through the variables
     */
    me->surrogate = (pSplit) NULL;
    for (i = 0; i < ct.nvar; i++) {
	if (var == i)
	    continue;
	ncat = ct.numcat[i];

	choose_surg(n1, n2, tempy, xdata[i], sorts[i], ncat,
		    &improve, &split, ct.csplit, lcount, rcount, &adj_agree);

	if (adj_agree <= 1e-10)    /* was 0 */
	    continue;           /* no better than default */

	/* sort it onto the list of surrogates */
	ss = insert_split(&(me->surrogate), ncat, improve, ct.maxsur);
	if (ss) {
	    ss->improve = improve;
	    ss->var_num = i;
	    ss->count = 0;      /* corrected by nodesplit() */
	    ss->adj = adj_agree;
	    if (ct.numcat[i] == 0) {
		ss->spoint = split;
		ss->csplit[0] = ct.csplit[0];
	    } else
		for (k = 0; k < ct.numcat[i]; k++)
		    ss->csplit[k] = ct.csplit[k];
	}
    }
}
예제 #2
0
파일: surrogate.c 프로젝트: cran/mvpart
void surrogate(struct node *me, int nodenum)
    {
    int i, j, k;
    int var;   /* the primary split variable */
    FLOAT split;
    double improve;
    double lcount, rcount;    /* weight sent left and right by primary */
    int extra;
    struct split *ss;
    int  *index;
    int  *which,
     *tempy;
    Sint **sorts;
    FLOAT **xdata;
    int ncat;
    double adj_agree;

    which = rp.which;
    tempy = rp.tempvec;
    sorts = rp.sorts;
    xdata = rp.xdata;
    /*
    ** First construct, in tempy, the "y" variable for this calculation.
    ** It will be LEFT:goes left, 0:missing, RIGHT:goes right.
    **  Count up the number of obs the primary sends to the left, as my
    **  last surrogate (or to the right, if larger).
    */
    var = (me->primary)->var_num;
    if (rp.numcat[var]==0)  {  /* continuous variable */
    split = (me->primary)->spoint;
    extra = (me->primary)->csplit[0];
    for (i=0; i<rp.n; i++) {
        j = sorts[var][i];
        if (j<0) tempy[-(j+1)]=0;
        else if (which[j] == nodenum) {
        if (xdata[var][i] < split)
             tempy[j] = extra;
        else
             tempy[j] =  -extra;
        }
        }
    }

    else {  /* categorical variable */
    index = (me->primary)->csplit;
    for (i=0; i<rp.n; i++) {
        if (which[i] != nodenum) continue;
        if (sorts[var][i]<0) tempy[i] =0;
        else        tempy[i] = index[(int)xdata[var][i] -1];
        }
    }

    lcount=0; rcount=0;
    for (i=0; i<rp.n; i++) {
    if (which[i] != nodenum) continue;
    switch(tempy[i]) {
        case LEFT : lcount += rp.wt[i];  break;
        case RIGHT: rcount += rp.wt[i];  break;
        default: break;
        }
    }

    if (lcount < rcount) me->lastsurrogate = RIGHT;
    else                 me->lastsurrogate = LEFT;

    /*
    ** Now walk through the variables
    */
    me->surrogate =0;
    for (i=0; i<rp.nvar; i++) {
    if (var == i) continue;
    ncat = rp.numcat[i];

    choose_surg(nodenum, tempy, xdata[i], sorts[i], ncat,
               &improve, &split, rp.csplit,   lcount, rcount,
               &adj_agree);
    if (adj_agree <=0) continue;  /*no better than default */

    /*  sort it onto the list of surrogates */
    ss = insert_split( &(me->surrogate), ncat, improve, rp.maxsur);
    if (ss !=0) {
        ss->improve  = improve;
        ss->var_num   = i;
        ss->count     = 0;       /*corrected by nodesplit() */
        ss->adj       = adj_agree;
        if (rp.numcat[i]==0) {
        ss->spoint    = split;
        ss->csplit[0] = rp.csplit[0];
        }
        else for (k=0; k<rp.numcat[i]; k++) ss->csplit[k] = rp.csplit[k];
        }
    }
    }