BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree( MatType& data, const size_t begin, const size_t count, std::vector<size_t>& oldFromNew, std::vector<size_t>& newFromOld, BinarySpaceTree* parent, const size_t maxLeafSize) : left(NULL), right(NULL), parent(parent), begin(begin), count(count), maxLeafSize(maxLeafSize), bound(data.n_rows), dataset(data) { // Hopefully the vector is initialized correctly! We can't check that // entirely but we can do a minor sanity check. // Perform the actual splitting. SplitNode(data, oldFromNew); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); // Map the newFromOld indices correctly. newFromOld.resize(data.n_cols); for (size_t i = 0; i < data.n_cols; i++) newFromOld[oldFromNew[i]] = i; }
SpillTree<MetricType, StatisticType, MatType, HyperplaneType, SplitType>:: SpillTree( MatType&& data, const double tau, const size_t maxLeafSize, const double rho) : left(NULL), right(NULL), parent(NULL), count(0), pointsIndex(NULL), overlappingNode(false), hyperplane(), bound(data.n_rows), parentDistance(0), // Parent distance for the root is 0: it has no parent. dataset(new MatType(std::move(data))), localDataset(true) { arma::Col<size_t> points; if (dataset->n_cols > 0) // Fill points with all possible indexes: 0 .. (dataset->n_cols - 1). points = arma::linspace<arma::Col<size_t>>(0, dataset->n_cols - 1, dataset->n_cols); // Do the actual splitting of this node. SplitNode(points, maxLeafSize, tau, rho); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); }
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree( MatType& data, std::vector<size_t>& oldFromNew, std::vector<size_t>& newFromOld, const size_t maxLeafSize) : left(NULL), right(NULL), parent(NULL), begin(0), count(data.n_cols), maxLeafSize(maxLeafSize), bound(data.n_rows), parentDistance(0), // Parent distance for the root is 0: it has no parent. dataset(data) { // Initialize the oldFromNew vector correctly. oldFromNew.resize(data.n_cols); for (size_t i = 0; i < data.n_cols; i++) oldFromNew[i] = i; // Fill with unharmed indices. // Now do the actual splitting. SplitNode(data, oldFromNew); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); // Map the newFromOld indices correctly. newFromOld.resize(data.n_cols); for (size_t i = 0; i < data.n_cols; i++) newFromOld[oldFromNew[i]] = i; }
CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::CoverTree( const MatType& dataset, const ElemType base, const size_t pointIndex, const int scale, CoverTree* parent, const ElemType parentDistance, const ElemType furthestDescendantDistance, MetricType* metric) : dataset(&dataset), point(pointIndex), scale(scale), base(base), numDescendants(0), parent(parent), parentDistance(parentDistance), furthestDescendantDistance(furthestDescendantDistance), localMetric(metric == NULL), localDataset(false), metric(metric), distanceComps(0) { // If necessary, create a local metric. if (localMetric) this->metric = new MetricType(); // Initialize the statistic. stat = StatisticType(*this); }
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree( MatType& data, const size_t begin, const size_t count, std::vector<size_t>& oldFromNew, SplitType& splitter, BinarySpaceTree* parent, const size_t maxLeafSize) : left(NULL), right(NULL), parent(parent), begin(begin), count(count), bound(data.n_rows), dataset(data) { // Hopefully the vector is initialized correctly! We can't check that // entirely but we can do a minor sanity check. assert(oldFromNew.size() == data.n_cols); // Perform the actual splitting. SplitNode(data, oldFromNew, maxLeafSize, splitter); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); }
inline void CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>:: RemoveNewImplicitNodes() { // If we created an implicit node, take its self-child instead (this could // happen multiple times). while (children[children.size() - 1]->NumChildren() == 1) { CoverTree* old = children[children.size() - 1]; children.erase(children.begin() + children.size() - 1); // Now take its child. children.push_back(&(old->Child(0))); // Set its parent and parameters correctly, and rebuild the statistic. old->Child(0).Parent() = this; old->Child(0).ParentDistance() = old->ParentDistance(); old->Child(0).DistanceComps() = old->DistanceComps(); old->Child(0).Stat() = StatisticType(old->Child(0)); // Remove its child (so it doesn't delete it). old->Children().erase(old->Children().begin() + old->Children().size() - 1); // Now delete it. delete old; } }
CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::CoverTree( const MatType& dataset, const ElemType base, const size_t pointIndex, const int scale, CoverTree* parent, const ElemType parentDistance, arma::Col<size_t>& indices, arma::vec& distances, size_t nearSetSize, size_t& farSetSize, size_t& usedSetSize, MetricType& metric) : dataset(&dataset), point(pointIndex), scale(scale), base(base), numDescendants(0), parent(parent), parentDistance(parentDistance), furthestDescendantDistance(0), localMetric(false), localDataset(false), metric(&metric), distanceComps(0) { // If the size of the near set is 0, this is a leaf. if (nearSetSize == 0) { this->scale = INT_MIN; numDescendants = 1; stat = StatisticType(*this); return; } // Otherwise, create the children. CreateChildren(indices, distances, nearSetSize, farSetSize, usedSetSize); // Initialize statistic. stat = StatisticType(*this); }
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree( MatType& data, const size_t maxLeafSize) : left(NULL), right(NULL), parent(NULL), begin(0), /* This root node starts at index 0, */ count(data.n_cols), /* and spans all of the dataset. */ bound(data.n_rows), parentDistance(0), // Parent distance for the root is 0: it has no parent. dataset(data) { // Do the actual splitting of this node. SplitNode(data, maxLeafSize); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); }
BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::BinarySpaceTree( MatType& data, const size_t begin, const size_t count, BinarySpaceTree* parent, const size_t maxLeafSize) : left(NULL), right(NULL), parent(parent), begin(begin), count(count), bound(data.n_rows), dataset(data) { // Perform the actual splitting. SplitNode(data, maxLeafSize); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); }
SpillTree<MetricType, StatisticType, MatType, HyperplaneType, SplitType>:: SpillTree( SpillTree* parent, arma::Col<size_t>& points, const double tau, const size_t maxLeafSize, const double rho) : left(NULL), right(NULL), parent(parent), count(0), pointsIndex(NULL), overlappingNode(false), hyperplane(), bound(parent->Dataset().n_rows), dataset(&parent->Dataset()), // Point to the parent's dataset. localDataset(false) { // Perform the actual splitting. SplitNode(points, maxLeafSize, tau, rho); // Create the statistic depending on if we are a leaf or not. stat = StatisticType(*this); }
CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::CoverTree( MatType&& data, MetricType& metric, const ElemType base) : dataset(new MatType(std::move(data))), point(RootPointPolicy::ChooseRoot(dataset)), scale(INT_MAX), base(base), numDescendants(0), parent(NULL), parentDistance(0), furthestDescendantDistance(0), localMetric(false), localDataset(true), metric(&metric), distanceComps(0) { // If there is only one point or zero points in the dataset... uh, we're done. // Technically, if the dataset has zero points, our node is not correct... if (dataset->n_cols <= 1) { scale = INT_MIN; return; } // Kick off the building. Create the indices array and the distances array. arma::Col<size_t> indices = arma::linspace<arma::Col<size_t> >(1, dataset->n_cols - 1, dataset->n_cols - 1); // This is now [1 2 3 4 ... n]. We must be sure that our point does not // occur. if (point != 0) indices[point - 1] = 0; // Put 0 back into the set; remove what was there. arma::vec distances(dataset->n_cols - 1); // Build the initial distances. ComputeDistances(point, indices, distances, dataset->n_cols - 1); // Create the children. size_t farSetSize = 0; size_t usedSetSize = 0; CreateChildren(indices, distances, dataset->n_cols - 1, farSetSize, usedSetSize); // If we ended up creating only one child, remove the implicit node. while (children.size() == 1) { // Prepare to delete the implicit child node. CoverTree* old = children[0]; // Now take its children and set their parent correctly. children.erase(children.begin()); for (size_t i = 0; i < old->NumChildren(); ++i) { children.push_back(&(old->Child(i))); // Set its parent correctly, and rebuild the statistic. old->Child(i).Parent() = this; old->Child(i).Stat() = StatisticType(old->Child(i)); } // Remove all the children so they don't get erased. old->Children().clear(); // Reduce our own scale. scale = old->Scale(); // Now delete it. delete old; } // Use the furthest descendant distance to determine the scale of the root // node. if (furthestDescendantDistance == 0.0) scale = INT_MIN; else scale = (int) ceil(log(furthestDescendantDistance) / log(base)); // Initialize statistic. stat = StatisticType(*this); Log::Info << distanceComps << " distance computations during tree " << "construction." << std::endl; }