Ejemplo n.º 1
0
bool MeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
                                              MatType& data,
                                              const size_t begin,
                                              const size_t count,
                                              size_t& splitDimension,
                                              size_t& splitCol,
                                              std::vector<size_t>& oldFromNew)
{
  splitDimension = data.n_rows; // Indicate invalid.
  double maxWidth = -1;

  // Find the split dimension.
  for (size_t d = 0; d < data.n_rows; d++)
  {
    double width = bound[d].Width();

    if (width > maxWidth)
    {
      maxWidth = width;
      splitDimension = d;
    }
  }

  if (maxWidth == 0) // All these points are the same.  We can't split.
    return false;

  // Split in the middle of that dimension.
  double splitVal = bound[splitDimension].Mid();

  // Perform the actual splitting.  This will order the dataset such that points
  // with value in dimension splitDimension less than or equal to splitVal are
  // on the left of splitCol, and points with value in dimension splitDimension
  // greater than splitVal are on the right side of splitCol.
  splitCol = PerformSplit(data, begin, count, splitDimension, splitVal,
      oldFromNew);

  return true;
}
Ejemplo n.º 2
0
bool MeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound,
                                              MatType& data,
                                              const size_t begin,
                                              const size_t count,
                                              size_t& splitCol,
                                              std::vector<size_t>& oldFromNew)
{
  size_t splitDimension = data.n_rows; // Indicate invalid.
  double maxWidth = -1;

  // Find the split dimension.  If the bound is tight, we only need to consult
  // the bound's width.
  if (bound::BoundTraits<BoundType>::HasTightBounds)
  {
    for (size_t d = 0; d < data.n_rows; d++)
    {
      const double width = bound[d].Width();

      if (width > maxWidth)
      {
        maxWidth = width;
        splitDimension = d;
      }
    }
  }
  else
  {
    // We must individually calculate bounding boxes.
    math::Range* ranges = new math::Range[data.n_rows];
    for (size_t i = begin; i < begin + count; ++i)
    {
      // Expand each dimension as necessary.
      for (size_t d = 0; d < data.n_rows; ++d)
      {
        const double val = data(d, i);
        if (val < ranges[d].Lo())
          ranges[d].Lo() = val;
        if (val > ranges[d].Hi())
          ranges[d].Hi() = val;
      }
    }

    // Now, which is the widest?
    for (size_t d = 0; d < data.n_rows; d++)
    {
      const double width = ranges[d].Width();

      if (width > maxWidth)
      {
        maxWidth = width;
        splitDimension = d;
      }
    }

    delete[] ranges;
  }

  if (maxWidth == 0) // All these points are the same.  We can't split.
    return false;

  // Split in the mean of that dimension.
  double splitVal = 0.0;
  for (size_t i = begin; i < begin + count; ++i)
    splitVal += data(splitDimension, i);
  splitVal /= count;

  Log::Assert(splitVal >= bound[splitDimension].Lo());
  Log::Assert(splitVal <= bound[splitDimension].Hi());

  // Perform the actual splitting.  This will order the dataset such that points
  // with value in dimension splitDimension less than or equal to splitVal are
  // on the left of splitCol, and points with value in dimension splitDimension
  // greater than splitVal are on the right side of splitCol.
  splitCol = PerformSplit(data, begin, count, splitDimension, splitVal,
      oldFromNew);

  return true;
}