示例#1
0
size_t SplitData(
    std::vector<CellData<D,C>*>& vdata, SplitMethod sm, 
    size_t start, size_t end, const Position<C>& meanpos)
{
    Assert(end-start > 1);
    size_t mid=0;

    Bounds<C> b;
    for(size_t i=start;i<end;++i) b += vdata[i]->getPos();

    int split = b.getSplit();

    switch (sm) { // three different split methods
      case MIDDLE :
           { // Middle is the average of the min and max value of x or y
               double splitvalue = b.getMiddle(split);
               DataCompareToValue<D,C> comp(split,splitvalue);
               typename std::vector<CellData<D,C>*>::iterator middle =
                   std::partition(vdata.begin()+start,vdata.begin()+end,comp);
               mid = middle - vdata.begin();
           } break;
      case MEDIAN :
           { // Median is the point which divides the group into equal numbers
               DataCompare<D,C> comp(split);
               mid = (start+end)/2;
               typename std::vector<CellData<D,C>*>::iterator middle = vdata.begin()+mid;
               std::nth_element(vdata.begin()+start,middle,vdata.begin()+end,comp);
           } break;
      case MEAN :
           { // Mean is the weighted average value of x or y
               double splitvalue = meanpos.get(split);
               DataCompareToValue<D,C> comp(split,splitvalue);
               typename std::vector<CellData<D,C>*>::iterator middle =
                   std::partition(vdata.begin()+start,vdata.begin()+end,comp);
               mid = middle - vdata.begin();
           } break;
      case RANDOM :
           { // Random is a random point from the first quartile to the third quartile
               DataCompare<D,C> comp(split);

               // The code for RANDOM is same as MEDIAN except for the next line.
               // Note: The lo and hi values are slightly subtle.  We want to make sure if there
               // are only two values, we actually split.  So if start=1, end=3, the only possible
               // result should be mid=2.  Otherwise, we want roughly 1/4 and 3/4 of the span.
               mid = select_random(end-3*(end-start)/4,start+3*(end-start)/4);

               typename std::vector<CellData<D,C>*>::iterator middle = vdata.begin()+mid;
               std::nth_element(vdata.begin()+start,middle,vdata.begin()+end,comp);
           } break;
      default :
           myerror("Invalid SplitMethod");
    }

    if (mid == start || mid == end) {
        xdbg<<"Found mid not in middle.  Probably duplicate entries.\n";
        xdbg<<"start = "<<start<<std::endl;
        xdbg<<"end = "<<end<<std::endl;
        xdbg<<"mid = "<<mid<<std::endl;
        xdbg<<"sm = "<<sm<<std::endl;
        xdbg<<"b = "<<b<<std::endl;
        xdbg<<"split = "<<split<<std::endl;
        for(size_t i=start; i!=end; ++i) {
            xdbg<<"v["<<i<<"] = "<<vdata[i]<<std::endl;
        }
        // With duplicate entries, can get mid == start or mid == end. 
        // This should only happen if all entries in this set are equal.
        // So it should be safe to just take the mid = (start + end)/2.
        // But just to be safe, re-call this function with sm = MEDIAN to 
        // make sure.
        Assert(sm != MEDIAN);
        return SplitData(vdata,MEDIAN,start,end,meanpos);
    }
    Assert(mid > start);
    Assert(mid < end);
    return mid;
}