Polygon Polygon_createSimple (wchar_t *xystring) { try { long numberOfPoints; autoNUMvector<double> xys (NUMstring_to_numbers (xystring, &numberOfPoints), 1); if (numberOfPoints < 6) { Melder_throw ("There must be at least 3 points (= x,y pairs) in the Polygon"); } if (numberOfPoints % 2 != 0) { Melder_throw ("One value is missing."); } numberOfPoints /= 2; // x,y pairs autoPolygon me = Polygon_create (numberOfPoints); for (long i = 1; i <= numberOfPoints; i++) { my x[i] = xys[2 * i - 1]; my y[i] = xys[2 * i]; if (i > 1 && my x[i] == my x[i - 1] && my y[i] == my y[i - 1]) { Melder_warning ("Two successives vertices are equal."); } } return me.transfer(); } catch (MelderError) { Melder_throw ("Polygon not created."); } }
void dfbuildinternal(const ap::real_2d_array& xy, int npoints, int nvars, int nclasses, int ntrees, int samplesize, int nfeatures, int flags, int& info, decisionforest& df, dfreport& rep) { int i; int j; int k; int tmpi; int lasttreeoffs; int offs; int ooboffs; int treesize; int nvarsinpool; bool useevs; dfinternalbuffers bufs; ap::integer_1d_array permbuf; ap::real_1d_array oobbuf; ap::integer_1d_array oobcntbuf; ap::real_2d_array xys; ap::real_1d_array x; ap::real_1d_array y; int oobcnt; int oobrelcnt; double v; double vmin; double vmax; bool bflag; // // Test for inputs // if( npoints<1||samplesize<1||samplesize>npoints||nvars<1||nclasses<1||ntrees<1||nfeatures<1 ) { info = -1; return; } if( nclasses>1 ) { for(i = 0; i <= npoints-1; i++) { if( ap::round(xy(i,nvars))<0||ap::round(xy(i,nvars))>=nclasses ) { info = -2; return; } } } info = 1; // // Flags // useevs = flags/dfuseevs%2!=0; // // Allocate data, prepare header // treesize = 1+innernodewidth*(samplesize-1)+leafnodewidth*samplesize; permbuf.setbounds(0, npoints-1); bufs.treebuf.setbounds(0, treesize-1); bufs.idxbuf.setbounds(0, npoints-1); bufs.tmpbufr.setbounds(0, npoints-1); bufs.tmpbufr2.setbounds(0, npoints-1); bufs.tmpbufi.setbounds(0, npoints-1); bufs.varpool.setbounds(0, nvars-1); bufs.evsbin.setbounds(0, nvars-1); bufs.evssplits.setbounds(0, nvars-1); bufs.classibuf.setbounds(0, 2*nclasses-1); oobbuf.setbounds(0, nclasses*npoints-1); oobcntbuf.setbounds(0, npoints-1); df.trees.setbounds(0, ntrees*treesize-1); xys.setbounds(0, samplesize-1, 0, nvars); x.setbounds(0, nvars-1); y.setbounds(0, nclasses-1); for(i = 0; i <= npoints-1; i++) { permbuf(i) = i; } for(i = 0; i <= npoints*nclasses-1; i++) { oobbuf(i) = 0; } for(i = 0; i <= npoints-1; i++) { oobcntbuf(i) = 0; } // // Prepare variable pool and EVS (extended variable selection/splitting) buffers // (whether EVS is turned on or not): // 1. detect binary variables and pre-calculate splits for them // 2. detect variables with non-distinct values and exclude them from pool // for(i = 0; i <= nvars-1; i++) { bufs.varpool(i) = i; } nvarsinpool = nvars; if( useevs ) { for(j = 0; j <= nvars-1; j++) { vmin = xy(0,j); vmax = vmin; for(i = 0; i <= npoints-1; i++) { v = xy(i,j); vmin = ap::minreal(vmin, v); vmax = ap::maxreal(vmax, v); } if( ap::fp_eq(vmin,vmax) ) { // // exclude variable from pool // bufs.varpool(j) = bufs.varpool(nvarsinpool-1); bufs.varpool(nvarsinpool-1) = -1; nvarsinpool = nvarsinpool-1; continue; } bflag = false; for(i = 0; i <= npoints-1; i++) { v = xy(i,j); if( ap::fp_neq(v,vmin)&&ap::fp_neq(v,vmax) ) { bflag = true; break; } } if( bflag ) { // // non-binary variable // bufs.evsbin(j) = false; } else { // // Prepare // bufs.evsbin(j) = true; bufs.evssplits(j) = 0.5*(vmin+vmax); if( ap::fp_less_eq(bufs.evssplits(j),vmin) ) { bufs.evssplits(j) = vmax; } } } } // // RANDOM FOREST FORMAT // W[0] - size of array // W[1] - version number // W[2] - NVars // W[3] - NClasses (1 for regression) // W[4] - NTrees // W[5] - trees offset // // // TREE FORMAT // W[Offs] - size of sub-array // node info: // W[K+0] - variable number (-1 for leaf mode) // W[K+1] - threshold (class/value for leaf node) // W[K+2] - ">=" branch index (absent for leaf node) // // df.nvars = nvars; df.nclasses = nclasses; df.ntrees = ntrees; // // Build forest // offs = 0; for(i = 0; i <= ntrees-1; i++) { // // Prepare sample // for(k = 0; k <= samplesize-1; k++) { j = k+ap::randominteger(npoints-k); tmpi = permbuf(k); permbuf(k) = permbuf(j); permbuf(j) = tmpi; j = permbuf(k); ap::vmove(&xys(k, 0), 1, &xy(j, 0), 1, ap::vlen(0,nvars)); } // // build tree, copy // dfbuildtree(xys, samplesize, nvars, nclasses, nfeatures, nvarsinpool, flags, bufs); j = ap::round(bufs.treebuf(0)); ap::vmove(&df.trees(offs), 1, &bufs.treebuf(0), 1, ap::vlen(offs,offs+j-1)); lasttreeoffs = offs; offs = offs+j; // // OOB estimates // for(k = samplesize; k <= npoints-1; k++) { for(j = 0; j <= nclasses-1; j++) { y(j) = 0; } j = permbuf(k); ap::vmove(&x(0), 1, &xy(j, 0), 1, ap::vlen(0,nvars-1)); dfprocessinternal(df, lasttreeoffs, x, y); ap::vadd(&oobbuf(j*nclasses), 1, &y(0), 1, ap::vlen(j*nclasses,(j+1)*nclasses-1)); oobcntbuf(j) = oobcntbuf(j)+1; } } df.bufsize = offs; // // Normalize OOB results // for(i = 0; i <= npoints-1; i++) { if( oobcntbuf(i)!=0 ) { v = double(1)/double(oobcntbuf(i)); ap::vmul(&oobbuf(i*nclasses), 1, ap::vlen(i*nclasses,i*nclasses+nclasses-1), v); } } // // Calculate training set estimates // rep.relclserror = dfrelclserror(df, xy, npoints); rep.avgce = dfavgce(df, xy, npoints); rep.rmserror = dfrmserror(df, xy, npoints); rep.avgerror = dfavgerror(df, xy, npoints); rep.avgrelerror = dfavgrelerror(df, xy, npoints); // // Calculate OOB estimates. // rep.oobrelclserror = 0; rep.oobavgce = 0; rep.oobrmserror = 0; rep.oobavgerror = 0; rep.oobavgrelerror = 0; oobcnt = 0; oobrelcnt = 0; for(i = 0; i <= npoints-1; i++) { if( oobcntbuf(i)!=0 ) { ooboffs = i*nclasses; if( nclasses>1 ) { // // classification-specific code // k = ap::round(xy(i,nvars)); tmpi = 0; for(j = 1; j <= nclasses-1; j++) { if( ap::fp_greater(oobbuf(ooboffs+j),oobbuf(ooboffs+tmpi)) ) { tmpi = j; } } if( tmpi!=k ) { rep.oobrelclserror = rep.oobrelclserror+1; } if( ap::fp_neq(oobbuf(ooboffs+k),0) ) { rep.oobavgce = rep.oobavgce-log(oobbuf(ooboffs+k)); } else { rep.oobavgce = rep.oobavgce-log(ap::minrealnumber); } for(j = 0; j <= nclasses-1; j++) { if( j==k ) { rep.oobrmserror = rep.oobrmserror+ap::sqr(oobbuf(ooboffs+j)-1); rep.oobavgerror = rep.oobavgerror+fabs(oobbuf(ooboffs+j)-1); rep.oobavgrelerror = rep.oobavgrelerror+fabs(oobbuf(ooboffs+j)-1); oobrelcnt = oobrelcnt+1; } else { rep.oobrmserror = rep.oobrmserror+ap::sqr(oobbuf(ooboffs+j)); rep.oobavgerror = rep.oobavgerror+fabs(oobbuf(ooboffs+j)); } } } else { // // regression-specific code // rep.oobrmserror = rep.oobrmserror+ap::sqr(oobbuf(ooboffs)-xy(i,nvars)); rep.oobavgerror = rep.oobavgerror+fabs(oobbuf(ooboffs)-xy(i,nvars)); if( ap::fp_neq(xy(i,nvars),0) ) { rep.oobavgrelerror = rep.oobavgrelerror+fabs((oobbuf(ooboffs)-xy(i,nvars))/xy(i,nvars)); oobrelcnt = oobrelcnt+1; } } // // update OOB estimates count. // oobcnt = oobcnt+1; } } if( oobcnt>0 ) { rep.oobrelclserror = rep.oobrelclserror/oobcnt; rep.oobavgce = rep.oobavgce/oobcnt; rep.oobrmserror = sqrt(rep.oobrmserror/(oobcnt*nclasses)); rep.oobavgerror = rep.oobavgerror/(oobcnt*nclasses); if( oobrelcnt>0 ) { rep.oobavgrelerror = rep.oobavgrelerror/oobrelcnt; } } }