size_t SplitData( std::vector<CellData<D,C>*>& vdata, SplitMethod sm, size_t start, size_t end, const Position<C>& meanpos) { Assert(end-start > 1); size_t mid=0; Bounds<C> b; for(size_t i=start;i<end;++i) b += vdata[i]->getPos(); int split = b.getSplit(); switch (sm) { // three different split methods case MIDDLE : { // Middle is the average of the min and max value of x or y double splitvalue = b.getMiddle(split); DataCompareToValue<D,C> comp(split,splitvalue); typename std::vector<CellData<D,C>*>::iterator middle = std::partition(vdata.begin()+start,vdata.begin()+end,comp); mid = middle - vdata.begin(); } break; case MEDIAN : { // Median is the point which divides the group into equal numbers DataCompare<D,C> comp(split); mid = (start+end)/2; typename std::vector<CellData<D,C>*>::iterator middle = vdata.begin()+mid; std::nth_element(vdata.begin()+start,middle,vdata.begin()+end,comp); } break; case MEAN : { // Mean is the weighted average value of x or y double splitvalue = meanpos.get(split); DataCompareToValue<D,C> comp(split,splitvalue); typename std::vector<CellData<D,C>*>::iterator middle = std::partition(vdata.begin()+start,vdata.begin()+end,comp); mid = middle - vdata.begin(); } break; case RANDOM : { // Random is a random point from the first quartile to the third quartile DataCompare<D,C> comp(split); // The code for RANDOM is same as MEDIAN except for the next line. // Note: The lo and hi values are slightly subtle. We want to make sure if there // are only two values, we actually split. So if start=1, end=3, the only possible // result should be mid=2. Otherwise, we want roughly 1/4 and 3/4 of the span. mid = select_random(end-3*(end-start)/4,start+3*(end-start)/4); typename std::vector<CellData<D,C>*>::iterator middle = vdata.begin()+mid; std::nth_element(vdata.begin()+start,middle,vdata.begin()+end,comp); } break; default : myerror("Invalid SplitMethod"); } if (mid == start || mid == end) { xdbg<<"Found mid not in middle. Probably duplicate entries.\n"; xdbg<<"start = "<<start<<std::endl; xdbg<<"end = "<<end<<std::endl; xdbg<<"mid = "<<mid<<std::endl; xdbg<<"sm = "<<sm<<std::endl; xdbg<<"b = "<<b<<std::endl; xdbg<<"split = "<<split<<std::endl; for(size_t i=start; i!=end; ++i) { xdbg<<"v["<<i<<"] = "<<vdata[i]<<std::endl; } // With duplicate entries, can get mid == start or mid == end. // This should only happen if all entries in this set are equal. // So it should be safe to just take the mid = (start + end)/2. // But just to be safe, re-call this function with sm = MEDIAN to // make sure. Assert(sm != MEDIAN); return SplitData(vdata,MEDIAN,start,end,meanpos); } Assert(mid > start); Assert(mid < end); return mid; }