void RangeSearchRules<MetricType, TreeType>::AddResult(const size_t queryIndex, TreeType& referenceNode) { // Some types of trees calculate the base case evaluation before Score() is // called, so if the base case has already been calculated, then we must avoid // adding that point to the results again. size_t baseCaseMod = 0; if (tree::TreeTraits<TreeType>::FirstPointIsCentroid && (queryIndex == lastQueryIndex) && (referenceNode.Point(0) == lastReferenceIndex)) { baseCaseMod = 1; } // Resize distances and neighbors vectors appropriately. We have to use // reserve() and not resize(), because we don't know if we will encounter the // case where the datasets and points are the same (and we skip in that case). const size_t oldSize = neighbors[queryIndex].size(); neighbors[queryIndex].reserve(oldSize + referenceNode.NumDescendants() - baseCaseMod); distances[queryIndex].reserve(oldSize + referenceNode.NumDescendants() - baseCaseMod); for (size_t i = baseCaseMod; i < referenceNode.NumDescendants(); ++i) { if ((&referenceSet == &querySet) && (queryIndex == referenceNode.Descendant(i))) continue; const double distance = metric.Evaluate(querySet.unsafe_col(queryIndex), referenceNode.Dataset().unsafe_col(referenceNode.Descendant(i))); neighbors[queryIndex].push_back(referenceNode.Descendant(i)); distances[queryIndex].push_back(distance); } }
void CheckBound(const TreeType& tree) { typedef typename TreeType::ElemType ElemType; for (size_t i = 0; i < tree.NumDescendants(); i++) { arma::Col<ElemType> point = tree.Dataset().col(tree.Descendant(i)); // Check that the point is contained in the bound. BOOST_REQUIRE_EQUAL(true, tree.Bound().Contains(point)); const arma::Mat<ElemType>& loBound = tree.Bound().LoBound(); const arma::Mat<ElemType>& hiBound = tree.Bound().HiBound(); // Ensure that there is a hyperrectangle that contains the point. bool success = false; for (size_t j = 0; j < tree.Bound().NumBounds(); j++) { success = true; for (size_t k = 0; k < loBound.n_rows; k++) { if (point[k] < loBound(k, j) - 1e-14 * std::fabs(loBound(k, j)) || point[k] > hiBound(k, j) + 1e-14 * std::fabs(hiBound(k, j))) { success = false; break; } } if (success) break; } BOOST_REQUIRE_EQUAL(success, true); } if (!tree.IsLeaf()) { CheckBound(*tree.Left()); CheckBound(*tree.Right()); } }
void GreedySingleTreeTraverser<TreeType, RuleType>::Traverse( const size_t queryIndex, TreeType& referenceNode) { // Run the base case as necessary for all the points in the reference node. for (size_t i = 0; i < referenceNode.NumPoints(); ++i) rule.BaseCase(queryIndex, referenceNode.Point(i)); size_t bestChild = rule.GetBestChild(queryIndex, referenceNode); size_t numDescendants; // Check that referencenode is not a leaf node while calculating number of // descendants of it's best child. if (!referenceNode.IsLeaf()) numDescendants = referenceNode.Child(bestChild).NumDescendants(); else numDescendants = referenceNode.NumPoints(); // If number of descendants are more than minBaseCases than we can go along // with best child otherwise we need to traverse for each descendant to // ensure that we calculate at least minBaseCases number of base cases. if (!referenceNode.IsLeaf()) { if (numDescendants > minBaseCases) { // We are prunning all but one child. numPrunes += referenceNode.NumChildren() - 1; // Recurse the best child. Traverse(queryIndex, referenceNode.Child(bestChild)); } else { // Run the base case over first minBaseCases number of descendants. for (size_t i = 0; i <= minBaseCases; ++i) rule.BaseCase(queryIndex, referenceNode.Descendant(i)); } } }
inline double KDERules<MetricType, KernelType, TreeType>:: Score(TreeType& queryNode, TreeType& referenceNode) { double score, maxKernel, minKernel, bound; const double minDistance = queryNode.MinDistance(referenceNode); // Calculations are not duplicated. bool newCalculations = true; if (tree::TreeTraits<TreeType>::FirstPointIsCentroid && (traversalInfo.LastQueryNode() != NULL) && (traversalInfo.LastReferenceNode() != NULL) && (traversalInfo.LastQueryNode()->Point(0) == queryNode.Point(0)) && (traversalInfo.LastReferenceNode()->Point(0) == referenceNode.Point(0))) { // Don't duplicate calculations. newCalculations = false; lastQueryIndex = queryNode.Point(0); lastReferenceIndex = referenceNode.Point(0); } else { // Calculations are new. maxKernel = kernel.Evaluate(minDistance); minKernel = kernel.Evaluate(queryNode.MaxDistance(referenceNode)); bound = maxKernel - minKernel; } // If possible, avoid some calculations because of the error tolerance. if (newCalculations && bound <= (absError + relError * minKernel) / referenceSet.n_cols) { // Auxiliary variables. double kernelValue; kde::KDEStat& referenceStat = referenceNode.Stat(); kde::KDEStat& queryStat = queryNode.Stat(); // If calculating a center is not required. if (tree::TreeTraits<TreeType>::FirstPointIsCentroid) { kernelValue = EvaluateKernel(queryNode.Point(0), referenceNode.Point(0)); } // Sadly, we have no choice but to calculate the center. else { kernelValue = EvaluateKernel(queryStat.Centroid(), referenceStat.Centroid()); } // Sum up estimations. for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { densities(queryNode.Descendant(i)) += referenceNode.NumDescendants() * kernelValue; } score = DBL_MAX; } else { score = minDistance; } ++scores; traversalInfo.LastQueryNode() = &queryNode; traversalInfo.LastReferenceNode() = &referenceNode; traversalInfo.LastScore() = score; return score; }
void CheckDistance(TreeType& tree, TreeType* node = NULL) { typedef typename TreeType::ElemType ElemType; if (node == NULL) { node = &tree; while (node->Parent() != NULL) node = node->Parent(); CheckDistance<TreeType, MetricType>(tree, node); for (size_t j = 0; j < tree.Dataset().n_cols; j++) { const arma::Col<ElemType>& point = tree. Dataset().col(j); ElemType maxDist = 0; ElemType minDist = std::numeric_limits<ElemType>::max(); for (size_t i = 0; i < tree.NumDescendants(); i++) { ElemType dist = MetricType::Evaluate( tree.Dataset().col(tree.Descendant(i)), tree.Dataset().col(j)); if (dist > maxDist) maxDist = dist; if (dist < minDist) minDist = dist; } BOOST_REQUIRE_LE(tree.Bound().MinDistance(point), minDist * (1.0 + 10 * std::numeric_limits<ElemType>::epsilon())); BOOST_REQUIRE_LE(maxDist, tree.Bound().MaxDistance(point) * (1.0 + 10 * std::numeric_limits<ElemType>::epsilon())); math::RangeType<ElemType> r = tree.Bound().RangeDistance(point); BOOST_REQUIRE_LE(r.Lo(), minDist * (1.0 + 10 * std::numeric_limits<ElemType>::epsilon())); BOOST_REQUIRE_LE(maxDist, r.Hi() * (1.0 + 10 * std::numeric_limits<ElemType>::epsilon())); } if (!tree.IsLeaf()) { CheckDistance<TreeType, MetricType>(*tree.Left()); CheckDistance<TreeType, MetricType>(*tree.Right()); } } else { if (&tree != node) { ElemType maxDist = 0; ElemType minDist = std::numeric_limits<ElemType>::max(); for (size_t i = 0; i < tree.NumDescendants(); i++) for (size_t j = 0; j < node->NumDescendants(); j++) { ElemType dist = MetricType::Evaluate( tree.Dataset().col(tree.Descendant(i)), node->Dataset().col(node->Descendant(j))); if (dist > maxDist) maxDist = dist; if (dist < minDist) minDist = dist; } BOOST_REQUIRE_LE(tree.Bound().MinDistance(node->Bound()), minDist * (1.0 + 10 * std::numeric_limits<ElemType>::epsilon())); BOOST_REQUIRE_LE(maxDist, tree.Bound().MaxDistance(node->Bound()) * (1.0 + 10 * std::numeric_limits<ElemType>::epsilon())); math::RangeType<ElemType> r = tree.Bound().RangeDistance(node->Bound()); BOOST_REQUIRE_LE(r.Lo(), minDist * (1.0 + 10 * std::numeric_limits<ElemType>::epsilon())); BOOST_REQUIRE_LE(maxDist, r.Hi() * (1.0 + 10 * std::numeric_limits<ElemType>::epsilon())); } if (!node->IsLeaf()) { CheckDistance<TreeType, MetricType>(tree, node->Left()); CheckDistance<TreeType, MetricType>(tree, node->Right()); } } }
double RangeSearchRules<MetricType, TreeType>::Score(TreeType& queryNode, TreeType& referenceNode) { math::Range distances; if (tree::TreeTraits<TreeType>::FirstPointIsCentroid) { // It is possible that the base case has already been calculated. double baseCase = 0.0; bool alreadyDone = false; if (tree::TreeTraits<TreeType>::HasSelfChildren) { TreeType* lastQuery = (TreeType*) referenceNode.Stat().LastDistanceNode(); TreeType* lastRef = (TreeType*) queryNode.Stat().LastDistanceNode(); // Did the query node's last combination do the base case? if ((lastRef != NULL) && (referenceNode.Point(0) == lastRef->Point(0))) { baseCase = queryNode.Stat().LastDistance(); alreadyDone = true; } // Did the reference node's last combination do the base case? if ((lastQuery != NULL) && (queryNode.Point(0) == lastQuery->Point(0))) { baseCase = referenceNode.Stat().LastDistance(); alreadyDone = true; } // If the query node is a self-child, did the query parent's last // combination do the base case? if ((queryNode.Parent() != NULL) && (queryNode.Point(0) == queryNode.Parent()->Point(0))) { TreeType* lastParentRef = (TreeType*) queryNode.Parent()->Stat().LastDistanceNode(); if ((lastParentRef != NULL) && (referenceNode.Point(0) == lastParentRef->Point(0))) { baseCase = queryNode.Parent()->Stat().LastDistance(); alreadyDone = true; } } // If the reference node is a self-child, did the reference parent's last // combination do the base case? if ((referenceNode.Parent() != NULL) && (referenceNode.Point(0) == referenceNode.Parent()->Point(0))) { TreeType* lastQueryRef = (TreeType*) referenceNode.Parent()->Stat().LastDistanceNode(); if ((lastQueryRef != NULL) && (queryNode.Point(0) == lastQueryRef->Point(0))) { baseCase = referenceNode.Parent()->Stat().LastDistance(); alreadyDone = true; } } } if (!alreadyDone) { // We must calculate the base case. baseCase = BaseCase(queryNode.Point(0), referenceNode.Point(0)); } else { // Make sure that if BaseCase() is called, we don't duplicate results. lastQueryIndex = queryNode.Point(0); lastReferenceIndex = referenceNode.Point(0); } distances.Lo() = baseCase - queryNode.FurthestDescendantDistance() - referenceNode.FurthestDescendantDistance(); distances.Hi() = baseCase + queryNode.FurthestDescendantDistance() + referenceNode.FurthestDescendantDistance(); // Update the last distances performed for the query and reference node. queryNode.Stat().LastDistanceNode() = (void*) &referenceNode; queryNode.Stat().LastDistance() = baseCase; referenceNode.Stat().LastDistanceNode() = (void*) &queryNode; referenceNode.Stat().LastDistance() = baseCase; } else { // Just perform the calculation. distances = referenceNode.RangeDistance(&queryNode); } // If the ranges do not overlap, prune this node. if (!distances.Contains(range)) return DBL_MAX; // In this case, all of the points in the reference node will be part of all // the results for each point in the query node. if ((distances.Lo() >= range.Lo()) && (distances.Hi() <= range.Hi())) { for (size_t i = 0; i < queryNode.NumDescendants(); ++i) AddResult(queryNode.Descendant(i), referenceNode); return DBL_MAX; // We don't need to go any deeper. } // Otherwise the score doesn't matter. Recursion order is irrelevant in range // search. return 0.0; }