bool MeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound, MatType& data, const size_t begin, const size_t count, size_t& splitDimension, size_t& splitCol, std::vector<size_t>& oldFromNew) { splitDimension = data.n_rows; // Indicate invalid. double maxWidth = -1; // Find the split dimension. for (size_t d = 0; d < data.n_rows; d++) { double width = bound[d].Width(); if (width > maxWidth) { maxWidth = width; splitDimension = d; } } if (maxWidth == 0) // All these points are the same. We can't split. return false; // Split in the middle of that dimension. double splitVal = bound[splitDimension].Mid(); // Perform the actual splitting. This will order the dataset such that points // with value in dimension splitDimension less than or equal to splitVal are // on the left of splitCol, and points with value in dimension splitDimension // greater than splitVal are on the right side of splitCol. splitCol = PerformSplit(data, begin, count, splitDimension, splitVal, oldFromNew); return true; }
bool MeanSplit<BoundType, MatType>::SplitNode(const BoundType& bound, MatType& data, const size_t begin, const size_t count, size_t& splitCol, std::vector<size_t>& oldFromNew) { size_t splitDimension = data.n_rows; // Indicate invalid. double maxWidth = -1; // Find the split dimension. If the bound is tight, we only need to consult // the bound's width. if (bound::BoundTraits<BoundType>::HasTightBounds) { for (size_t d = 0; d < data.n_rows; d++) { const double width = bound[d].Width(); if (width > maxWidth) { maxWidth = width; splitDimension = d; } } } else { // We must individually calculate bounding boxes. math::Range* ranges = new math::Range[data.n_rows]; for (size_t i = begin; i < begin + count; ++i) { // Expand each dimension as necessary. for (size_t d = 0; d < data.n_rows; ++d) { const double val = data(d, i); if (val < ranges[d].Lo()) ranges[d].Lo() = val; if (val > ranges[d].Hi()) ranges[d].Hi() = val; } } // Now, which is the widest? for (size_t d = 0; d < data.n_rows; d++) { const double width = ranges[d].Width(); if (width > maxWidth) { maxWidth = width; splitDimension = d; } } delete[] ranges; } if (maxWidth == 0) // All these points are the same. We can't split. return false; // Split in the mean of that dimension. double splitVal = 0.0; for (size_t i = begin; i < begin + count; ++i) splitVal += data(splitDimension, i); splitVal /= count; Log::Assert(splitVal >= bound[splitDimension].Lo()); Log::Assert(splitVal <= bound[splitDimension].Hi()); // Perform the actual splitting. This will order the dataset such that points // with value in dimension splitDimension less than or equal to splitVal are // on the left of splitCol, and points with value in dimension splitDimension // greater than splitVal are on the right side of splitCol. splitCol = PerformSplit(data, begin, count, splitDimension, splitVal, oldFromNew); return true; }