void CoverTree<MetricType, RootPointPolicy, StatisticType>:: DualTreeTraverser<RuleType>::Traverse( CoverTree<MetricType, RootPointPolicy, StatisticType>& queryNode, CoverTree<MetricType, RootPointPolicy, StatisticType>& referenceNode) { // Start by creating a map and adding the reference root node to it. std::map<int, std::vector<DualCoverTreeMapEntry> > refMap; DualCoverTreeMapEntry rootRefEntry; rootRefEntry.referenceNode = &referenceNode; // Perform the evaluation between the roots of either tree. rootRefEntry.score = rule.Score(queryNode, referenceNode); rootRefEntry.baseCase = rule.BaseCase(queryNode.Point(), referenceNode.Point()); rootRefEntry.traversalInfo = rule.TraversalInfo(); refMap[referenceNode.Scale()].push_back(rootRefEntry); Traverse(queryNode, refMap); }
CoverTree<MetricType, StatisticType, MatType, RootPointPolicy>::CoverTree( MatType&& data, MetricType& metric, const ElemType base) : dataset(new MatType(std::move(data))), point(RootPointPolicy::ChooseRoot(dataset)), scale(INT_MAX), base(base), numDescendants(0), parent(NULL), parentDistance(0), furthestDescendantDistance(0), localMetric(false), localDataset(true), metric(&metric), distanceComps(0) { // If there is only one point or zero points in the dataset... uh, we're done. // Technically, if the dataset has zero points, our node is not correct... if (dataset->n_cols <= 1) return; // Kick off the building. Create the indices array and the distances array. arma::Col<size_t> indices = arma::linspace<arma::Col<size_t> >(1, dataset->n_cols - 1, dataset->n_cols - 1); // This is now [1 2 3 4 ... n]. We must be sure that our point does not // occur. if (point != 0) indices[point - 1] = 0; // Put 0 back into the set; remove what was there. arma::vec distances(dataset->n_cols - 1); // Build the initial distances. ComputeDistances(point, indices, distances, dataset->n_cols - 1); // Create the children. size_t farSetSize = 0; size_t usedSetSize = 0; CreateChildren(indices, distances, dataset->n_cols - 1, farSetSize, usedSetSize); // If we ended up creating only one child, remove the implicit node. while (children.size() == 1) { // Prepare to delete the implicit child node. CoverTree* old = children[0]; // Now take its children and set their parent correctly. children.erase(children.begin()); for (size_t i = 0; i < old->NumChildren(); ++i) { children.push_back(&(old->Child(i))); // Set its parent correctly, and rebuild the statistic. old->Child(i).Parent() = this; old->Child(i).Stat() = StatisticType(old->Child(i)); } // Remove all the children so they don't get erased. old->Children().clear(); // Reduce our own scale. scale = old->Scale(); // Now delete it. delete old; } // Use the furthest descendant distance to determine the scale of the root // node. scale = (int) ceil(log(furthestDescendantDistance) / log(base)); // Initialize statistic. stat = StatisticType(*this); Log::Info << distanceComps << " distance computations during tree " << "construction." << std::endl; }
void CoverTree<MetricType, RootPointPolicy, StatisticType>:: DualTreeTraverser<RuleType>::Traverse( CoverTree<MetricType, RootPointPolicy, StatisticType>& queryNode, std::map<int, std::vector<DualCoverTreeMapEntry> >& referenceMap) { if (referenceMap.size() == 0) return; // Nothing to do! // First recurse down the reference nodes as necessary. ReferenceRecursion(queryNode, referenceMap); // Did the map get emptied? if (referenceMap.size() == 0) return; // Nothing to do! // Now, reduce the scale of the query node by recursing. But we can't recurse // if the query node is a leaf node. if ((queryNode.Scale() != INT_MIN) && (queryNode.Scale() >= (*referenceMap.rbegin()).first)) { // Recurse into the non-self-children first. The recursion order cannot // affect the runtime of the algorithm, because each query child recursion's // results are separate and independent. I don't think this is true in // every case, and we may have to modify this section to consider scores in // the future. for (size_t i = 1; i < queryNode.NumChildren(); ++i) { // We need a copy of the map for this child. std::map<int, std::vector<DualCoverTreeMapEntry> > childMap; PruneMap(queryNode.Child(i), referenceMap, childMap); Traverse(queryNode.Child(i), childMap); } std::map<int, std::vector<DualCoverTreeMapEntry> > selfChildMap; PruneMap(queryNode.Child(0), referenceMap, selfChildMap); Traverse(queryNode.Child(0), selfChildMap); } if (queryNode.Scale() != INT_MIN) return; // No need to evaluate base cases at this level. It's all done. // If we have made it this far, all we have is a bunch of base case // evaluations to do. Log::Assert((*referenceMap.begin()).first == INT_MIN); Log::Assert(queryNode.Scale() == INT_MIN); std::vector<DualCoverTreeMapEntry>& pointVector = (*referenceMap.begin()).second; for (size_t i = 0; i < pointVector.size(); ++i) { // Get a reference to the frame. const DualCoverTreeMapEntry& frame = pointVector[i]; CoverTree<MetricType, RootPointPolicy, StatisticType>* refNode = frame.referenceNode; // If the point is the same as both parents, then we have already done this // base case. if ((refNode->Point() == refNode->Parent()->Point()) && (queryNode.Point() == queryNode.Parent()->Point())) { ++numPrunes; continue; } // Score the node, to see if we can prune it, after restoring the traversal // info. rule.TraversalInfo() = frame.traversalInfo; double score = rule.Score(queryNode, *refNode); if (score == DBL_MAX) { ++numPrunes; continue; } // If not, compute the base case. rule.BaseCase(queryNode.Point(), pointVector[i].referenceNode->Point()); } }
void CoverTree<MetricType, RootPointPolicy, StatisticType>:: DualTreeTraverser<RuleType>::ReferenceRecursion( CoverTree& queryNode, std::map<int, std::vector<DualCoverTreeMapEntry> >& referenceMap) { // First, reduce the maximum scale in the reference map down to the scale of // the query node. while (!referenceMap.empty()) { // Hacky bullshit to imitate jl cover tree. if (queryNode.Parent() == NULL && (*referenceMap.rbegin()).first < queryNode.Scale()) break; if (queryNode.Parent() != NULL && (*referenceMap.rbegin()).first <= queryNode.Scale()) break; // If the query node's scale is INT_MIN and the reference map's maximum // scale is INT_MIN, don't try to recurse... if ((queryNode.Scale() == INT_MIN) && ((*referenceMap.rbegin()).first == INT_MIN)) break; // Get a reference to the current largest scale. std::vector<DualCoverTreeMapEntry>& scaleVector = (*referenceMap.rbegin()).second; // Before traversing all the points in this scale, sort by score. std::sort(scaleVector.begin(), scaleVector.end()); // Now loop over each element. for (size_t i = 0; i < scaleVector.size(); ++i) { // Get a reference to the current element. const DualCoverTreeMapEntry& frame = scaleVector.at(i); CoverTree<MetricType, RootPointPolicy, StatisticType>* refNode = frame.referenceNode; // Create the score for the children. double score = rule.Rescore(queryNode, *refNode, frame.score); // Now if this childScore is DBL_MAX we can prune all children. In this // recursion setup pruning is all or nothing for children. if (score == DBL_MAX) { ++numPrunes; continue; } // If it is not pruned, we must evaluate the base case. // Add the children. for (size_t j = 0; j < refNode->NumChildren(); ++j) { rule.TraversalInfo() = frame.traversalInfo; double childScore = rule.Score(queryNode, refNode->Child(j)); if (childScore == DBL_MAX) { ++numPrunes; continue; } // It wasn't pruned; evaluate the base case. const double baseCase = rule.BaseCase(queryNode.Point(), refNode->Child(j).Point()); DualCoverTreeMapEntry newFrame; newFrame.referenceNode = &refNode->Child(j); newFrame.score = childScore; // Use the score of the parent. newFrame.baseCase = baseCase; newFrame.traversalInfo = rule.TraversalInfo(); referenceMap[newFrame.referenceNode->Scale()].push_back(newFrame); } } // Now clear the memory for this scale; it isn't needed anymore. referenceMap.erase((*referenceMap.rbegin()).first); } }