int partition(int nodenum, pNode splitnode, double *sumrisk, int n1, int n2, int minsize, int split_Rule, double alpha, int bucketnum, int bucketMax, double train_to_est_ratio) { pNode me; double tempcp; int i, j, k; double tempcp2; double left_risk, right_risk; int left_split, right_split; double twt, ttr; int nleft, nright; int n; int min_node_size = minsize; FILE* fptr; me = splitnode; n = n2 - n1; /* total number of observations */ me->id = nodenum; //#ifdef DEBUG //fptr=fopen("C:\\Users\\vikasr\\Documents\\debug_text.txt","w"); //fprintf(fptr,"test print\n"); //fclose(fptr); R_FlushConsole(); //Rprintf("test print\n"); //R_ShowMessage("R_show_message\n"); //#endif if (nodenum > 1) { twt = 0; ttr = 0; k = 0; for (i = n1; i < n2; i++) { j = ct.sorts[0][i]; /* any variable would do, use first */ if (j < 0) j = -(1 + j); /* if missing, value = -(1+ true index) */ ct.wtemp[k] = ct.wt[j]; ct.trtemp[k] = ct.treatment[j]; ct.ytemp[k] = ct.ydata[j]; twt += ct.wt[j]; ttr += ct.treatment[j] * ct.wt[j]; k++; } if (split_Rule == 1) { // tot (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, ct.propensity); } else if (split_Rule == 2) { // ct (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, alpha, train_to_est_ratio); } else if (split_Rule == 3) { // fit (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, alpha, train_to_est_ratio); } else if (split_Rule == 4) { //tstats (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, alpha, train_to_est_ratio); } else if (split_Rule == 5) { // totD (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, ct.propensity); } else if (split_Rule == 6) { // CTD (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, alpha, train_to_est_ratio); } else if (split_Rule == 7) { //fitD (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, alpha, train_to_est_ratio); } else if (split_Rule == 8) { //tstatsD (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, alpha, train_to_est_ratio); } else if (split_Rule == 9) { // user (temporarily set as CT) (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, alpha, train_to_est_ratio); } else if (split_Rule == 10) { // userD (temporarily set as CTD) (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, alpha, train_to_est_ratio); }else if (split_Rule == 11) { // policy (temporarily set as CTD) (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, alpha, train_to_est_ratio); }else if (split_Rule == 12) { // policyD (temporarily set as CTD) (*ct_eval) (n, ct.ytemp, me->response_est, me->controlMean, me->treatMean, &(me->risk), ct.wtemp, ct.trtemp, ct.max_y, alpha, train_to_est_ratio); } me->num_obs = n; me->sum_wt = twt; me->sum_tr = ttr; tempcp = me->risk; if (tempcp > me->complexity) tempcp = me->complexity; } else tempcp = me->risk; /* * Can I quit now ? */ if (me->num_obs < ct.min_split || tempcp <= ct.alpha || nodenum > ct.maxnode) { me->complexity = ct.alpha; *sumrisk = me->risk; /* * make sure the split doesn't have random pointers to somewhere * i.e., don't trust that whoever allocated memory set it to zero */ me->leftson = (pNode) NULL; me->rightson = (pNode) NULL; me->primary = (pSplit) NULL; me->surrogate = (pSplit) NULL; return 0; } /* * Guess I have to do the split */ bsplit(me, n1, n2, min_node_size, split_Rule, alpha, bucketnum, bucketMax, train_to_est_ratio); if (!me->primary) { /* * This is rather rare -- but I couldn't find a split worth doing */ me->complexity = ct.alpha; me->leftson = (pNode) NULL; me->rightson = (pNode) NULL; me->primary = (pSplit) NULL; me->surrogate = (pSplit) NULL; *sumrisk = me->risk; return 0; } #ifdef DEBUG print_tree(me, 4); #endif if (ct.maxsur > 0) surrogate(me, n1, n2); else me->surrogate = (pSplit) NULL; nodesplit(me, nodenum, n1, n2, &nleft, &nright); /* * split the leftson */ me->leftson = (pNode) CALLOC(1, nodesize); (me->leftson)->parent = me; (me->leftson)->complexity = tempcp - ct.alpha; left_split = partition(2 * nodenum, me->leftson, &left_risk, n1, n1 + nleft, min_node_size, split_Rule, alpha, bucketnum, bucketMax, train_to_est_ratio); /* * Update my estimate of cp, and split the right son. */ tempcp = (me->risk - left_risk) / (left_split + 1); tempcp2 = (me->risk - (me->leftson)->risk); if (tempcp < tempcp2) tempcp = tempcp2; if (tempcp > me->complexity) tempcp = me->complexity; me->rightson = (pNode) CALLOC(1, nodesize); (me->rightson)->parent = me; (me->rightson)->complexity = tempcp - ct.alpha; right_split = partition(1 + 2 * nodenum, me->rightson, &right_risk, n1 + nleft, n1 + nleft + nright, min_node_size, split_Rule, alpha, bucketnum, bucketMax, train_to_est_ratio); /* * Now calculate my actual C.P., which depends on children nodes, and * on grandchildren who do not collapse before the children. * The calculation is done assuming that I am the top node of the * whole tree, an assumption to be fixed up later. */ tempcp = (me->risk - (left_risk + right_risk)) / (left_split + right_split + 1); /* Who goes first -- minimum of tempcp, leftson, and rightson */ if ((me->rightson)->complexity > (me->leftson)->complexity) { if (tempcp > (me->leftson)->complexity) { /* leftson collapses first */ left_risk = (me->leftson)->risk; left_split = 0; tempcp = (me->risk - (left_risk + right_risk)) / (left_split + right_split + 1); if (tempcp > (me->rightson)->complexity) { /* right one goes too */ right_risk = (me->rightson)->risk; right_split = 0; } } } else if (tempcp > (me->rightson)->complexity) { /* right hand child goes first */ right_split = 0; right_risk = (me->rightson)->risk; tempcp = (me->risk - (left_risk + right_risk)) / (left_split + right_split + 1); if (tempcp > (me->leftson)->complexity) { /* left one goes too */ left_risk = (me->leftson)->risk; left_split = 0; } } me->complexity = (me->risk - (left_risk + right_risk)) / (left_split + right_split + 1); if (me->complexity <= ct.alpha) { /* * All was in vain! This node doesn't split after all. */ free_tree(me, 0); *sumrisk = me->risk; for (i = n1; i < n2; i++) { j = ct.sorts[0][i]; if (j < 0) j = -(1 + j); ct.which[j] = nodenum; /* revert to the old nodenumber */ } return 0; /* return # of splits */ } else { *sumrisk = left_risk + right_risk; return left_split + right_split + 1; } }
int partition(int nodenum, pNode splitnode, double *sumrisk, int n1, int n2) { pNode me; double tempcp; int i, j, k; double tempcp2; double left_risk, right_risk; int left_split, right_split; double twt; int nleft, nright; int n; me = splitnode; n = n2 - n1; /* total number of observations */ if (nodenum > 1) { twt = 0; k = 0; for (i = n1; i < n2; i++) { j = rp.sorts[0][i]; /* any variable would do, use first */ if (j < 0) j = -(1 + j); /* if missing, value = -(1+ true index) */ rp.wtemp[k] = rp.wt[j]; rp.ytemp[k] = rp.ydata[j]; twt += rp.wt[j]; k++; } (*rp_eval) (n, rp.ytemp, me->response_est, &(me->risk), rp.wtemp); me->num_obs = n; me->sum_wt = twt; tempcp = me->risk; if (tempcp > me->complexity) tempcp = me->complexity; } else tempcp = me->risk; /* * Can I quit now ? */ if (me->num_obs < rp.min_split || tempcp <= rp.alpha || nodenum > rp.maxnode) { me->complexity = rp.alpha; *sumrisk = me->risk; /* * make sure the split doesn't have random pointers to somewhere * i.e., don't trust that whoever allocated memory set it to zero */ me->leftson = (pNode) NULL; me->rightson = (pNode) NULL; me->primary = (pSplit) NULL; me->surrogate = (pSplit) NULL; return 0; } /* * Guess I have to do the split */ bsplit(me, n1, n2); if (!me->primary) { /* * This is rather rare -- but I couldn't find a split worth doing */ me->complexity = rp.alpha; me->leftson = (pNode) NULL; me->rightson = (pNode) NULL; me->primary = (pSplit) NULL; me->surrogate = (pSplit) NULL; *sumrisk = me->risk; return 0; } #ifdef DEBUG print_tree(me, 2); #endif if (rp.maxsur > 0) surrogate(me, n1, n2); else me->surrogate = (pSplit) NULL; nodesplit(me, nodenum, n1, n2, &nleft, &nright); /* * split the leftson */ me->leftson = (pNode) CALLOC(1, nodesize); (me->leftson)->complexity = tempcp - rp.alpha; left_split = partition(2 * nodenum, me->leftson, &left_risk, n1, n1 + nleft); /* * Update my estimate of cp, and split the right son. */ tempcp = (me->risk - left_risk) / (left_split + 1); tempcp2 = (me->risk - (me->leftson)->risk); if (tempcp < tempcp2) tempcp = tempcp2; if (tempcp > me->complexity) tempcp = me->complexity; me->rightson = (pNode) CALLOC(1, nodesize); (me->rightson)->complexity = tempcp - rp.alpha; right_split = partition(1 + 2 * nodenum, me->rightson, &right_risk, n1 + nleft, n1 + nleft + nright); /* * Now calculate my actual C.P., which depends on children nodes, and * on grandchildren who do not collapse before the children. * The calculation is done assuming that I am the top node of the * whole tree, an assumption to be fixed up later. */ tempcp = (me->risk - (left_risk + right_risk)) / (left_split + right_split + 1); /* Who goes first -- minimum of tempcp, leftson, and rightson */ if ((me->rightson)->complexity > (me->leftson)->complexity) { if (tempcp > (me->leftson)->complexity) { /* leftson collapses first */ left_risk = (me->leftson)->risk; left_split = 0; tempcp = (me->risk - (left_risk + right_risk)) / (left_split + right_split + 1); if (tempcp > (me->rightson)->complexity) { /* right one goes too */ right_risk = (me->rightson)->risk; right_split = 0; } } } else if (tempcp > (me->rightson)->complexity) { /* right hand child goes first */ right_split = 0; right_risk = (me->rightson)->risk; tempcp = (me->risk - (left_risk + right_risk)) / (left_split + right_split + 1); if (tempcp > (me->leftson)->complexity) { /* left one goes too */ left_risk = (me->leftson)->risk; left_split = 0; } } me->complexity = (me->risk - (left_risk + right_risk)) / (left_split + right_split + 1); if (me->complexity <= rp.alpha) { /* * All was in vain! This node doesn't split after all. */ free_tree(me, 0); *sumrisk = me->risk; for (i = n1; i < n2; i++) { j = rp.sorts[0][i]; if (j < 0) j = -(1 + j); rp.which[j] = nodenum; /* revert to the old nodenumber */ } return 0; /* return # of splits */ } else { *sumrisk = left_risk + right_risk; return left_split + right_split + 1; } }