inline double KDERules<MetricType, KernelType, TreeType>:: Score(const size_t queryIndex, TreeType& referenceNode) { double score, maxKernel, minKernel, bound; const arma::vec& queryPoint = querySet.unsafe_col(queryIndex); const double minDistance = referenceNode.MinDistance(queryPoint); bool newCalculations = true; if (tree::TreeTraits<TreeType>::FirstPointIsCentroid && lastQueryIndex == queryIndex && traversalInfo.LastReferenceNode() != NULL && traversalInfo.LastReferenceNode()->Point(0) == referenceNode.Point(0)) { // Don't duplicate calculations. newCalculations = false; lastQueryIndex = queryIndex; lastReferenceIndex = referenceNode.Point(0); } else { // Calculations are new. maxKernel = kernel.Evaluate(minDistance); minKernel = kernel.Evaluate(referenceNode.MaxDistance(queryPoint)); bound = maxKernel - minKernel; } if (newCalculations && bound <= (absError + relError * minKernel) / referenceSet.n_cols) { // Estimate values. double kernelValue; // Calculate kernel value based on reference node centroid. if (tree::TreeTraits<TreeType>::FirstPointIsCentroid) { kernelValue = EvaluateKernel(queryIndex, referenceNode.Point(0)); } else { kde::KDEStat& referenceStat = referenceNode.Stat(); kernelValue = EvaluateKernel(queryPoint, referenceStat.Centroid()); } densities(queryIndex) += referenceNode.NumDescendants() * kernelValue; // Don't explore this tree branch. score = DBL_MAX; } else { score = minDistance; } ++scores; traversalInfo.LastReferenceNode() = &referenceNode; traversalInfo.LastScore() = score; return score; }
double DTBRules<MetricType, TreeType>::Score(TreeType& queryNode, TreeType& referenceNode) { // If all the queries belong to the same component as all the references // then we prune. if ((queryNode.Stat().ComponentMembership() >= 0) && (queryNode.Stat().ComponentMembership() == referenceNode.Stat().ComponentMembership())) return DBL_MAX; ++scores; const double distance = queryNode.MinDistance(&referenceNode); const double bound = CalculateBound(queryNode); // If all the points in the reference node are farther than the candidate // nearest neighbor for all queries in the node, we prune. return (bound < distance) ? DBL_MAX : distance; }
double DTBRules<MetricType, TreeType>::Score(const size_t queryIndex, TreeType& referenceNode) { size_t queryComponentIndex = connections.Find(queryIndex); // If the query belongs to the same component as all of the references, // then prune. The cast is to stop a warning about comparing unsigned to // signed values. if (queryComponentIndex == (size_t) referenceNode.Stat().ComponentMembership()) return DBL_MAX; const arma::vec queryPoint = dataSet.unsafe_col(queryIndex); const double distance = referenceNode.MinDistance(queryPoint); // If all the points in the reference node are farther than the candidate // nearest neighbor for the query's component, we prune. return neighborsDistances[queryComponentIndex] < distance ? DBL_MAX : distance; }
double DTBRules<MetricType, TreeType>::Score(const size_t queryIndex, TreeType& referenceNode, const double baseCaseResult) { // I don't really understand the last argument here // It just gets passed in the distance call, otherwise this function // is the same as the one above. size_t queryComponentIndex = connections.Find(queryIndex); // If the query belongs to the same component as all of the references, // then prune. if (queryComponentIndex == referenceNode.Stat().ComponentMembership()) return DBL_MAX; const arma::vec queryPoint = dataSet.unsafe_col(queryIndex); const double distance = referenceNode.MinDistance(queryPoint, baseCaseResult); // If all the points in the reference node are farther than the candidate // nearest neighbor for the query's component, we prune. return (neighborsDistances[queryComponentIndex] < distance) ? DBL_MAX : distance; }
double PellegMooreKMeansRules<MetricType, TreeType>::Score( const size_t /* queryIndex */, TreeType& referenceNode) { // Obtain the parent's blacklist. If this is the root node, we'll start with // an empty blacklist. This means that after each iteration, we don't need to // reset any statistics. if (referenceNode.Parent() == NULL || referenceNode.Parent()->Stat().Blacklist().n_elem == 0) referenceNode.Stat().Blacklist().zeros(centroids.n_cols); else referenceNode.Stat().Blacklist() = referenceNode.Parent()->Stat().Blacklist(); // The query index is a fake index that we won't use, and the reference node // holds all of the points in the dataset. Our goal is to determine whether // or not this node is dominated by a single cluster. const size_t whitelisted = centroids.n_cols - arma::accu(referenceNode.Stat().Blacklist()); distanceCalculations += whitelisted; // Which cluster has minimum distance to the node? size_t closestCluster = centroids.n_cols; double minMinDistance = DBL_MAX; for (size_t i = 0; i < centroids.n_cols; ++i) { if (referenceNode.Stat().Blacklist()[i] == 0) { const double minDistance = referenceNode.MinDistance(centroids.col(i)); if (minDistance < minMinDistance) { minMinDistance = minDistance; closestCluster = i; } } } // Now, for every other whitelisted cluster, determine if the closest cluster // owns the point. This calculation is specific to hyperrectangle trees (but, // this implementation is specific to kd-trees, so that's okay). For // circular-bound trees, the condition should be simpler and can probably be // expressed as a comparison between minimum and maximum distances. size_t newBlacklisted = 0; for (size_t c = 0; c < centroids.n_cols; ++c) { if (referenceNode.Stat().Blacklist()[c] == 1 || c == closestCluster) continue; // This algorithm comes from the proof of Lemma 4 in the extended version // of the Pelleg-Moore paper (the CMU tech report, that is). It has been // adapted for speed. arma::vec cornerPoint(centroids.n_rows); for (size_t d = 0; d < referenceNode.Bound().Dim(); ++d) { if (centroids(d, c) > centroids(d, closestCluster)) cornerPoint(d) = referenceNode.Bound()[d].Hi(); else cornerPoint(d) = referenceNode.Bound()[d].Lo(); } const double closestDist = metric.Evaluate(cornerPoint, centroids.col(closestCluster)); const double otherDist = metric.Evaluate(cornerPoint, centroids.col(c)); distanceCalculations += 3; // One for cornerPoint, then two distances. if (closestDist < otherDist) { // The closest cluster dominates the node with respect to the cluster c. // So we can blacklist c. referenceNode.Stat().Blacklist()[c] = 1; ++newBlacklisted; } } if (whitelisted - newBlacklisted == 1) { // This node is dominated by the closest cluster. counts[closestCluster] += referenceNode.NumDescendants(); newCentroids.col(closestCluster) += referenceNode.NumDescendants() * referenceNode.Stat().Centroid(); return DBL_MAX; } // Perform the base case here. for (size_t i = 0; i < referenceNode.NumPoints(); ++i) { size_t bestCluster = centroids.n_cols; double bestDistance = DBL_MAX; for (size_t c = 0; c < centroids.n_cols; ++c) { if (referenceNode.Stat().Blacklist()[c] == 1) continue; ++distanceCalculations; // The reference index is the index of the data point. const double distance = metric.Evaluate(centroids.col(c), dataset.col(referenceNode.Point(i))); if (distance < bestDistance) { bestDistance = distance; bestCluster = c; } } // Add to resulting centroid. newCentroids.col(bestCluster) += dataset.col(referenceNode.Point(i)); ++counts(bestCluster); } // Otherwise, we're not sure, so we can't prune. Recursion order doesn't make // a difference, so we'll just return a score of 0. return 0.0; }
inline double KDERules<MetricType, KernelType, TreeType>:: Score(TreeType& queryNode, TreeType& referenceNode) { double score, maxKernel, minKernel, bound; const double minDistance = queryNode.MinDistance(referenceNode); // Calculations are not duplicated. bool newCalculations = true; if (tree::TreeTraits<TreeType>::FirstPointIsCentroid && (traversalInfo.LastQueryNode() != NULL) && (traversalInfo.LastReferenceNode() != NULL) && (traversalInfo.LastQueryNode()->Point(0) == queryNode.Point(0)) && (traversalInfo.LastReferenceNode()->Point(0) == referenceNode.Point(0))) { // Don't duplicate calculations. newCalculations = false; lastQueryIndex = queryNode.Point(0); lastReferenceIndex = referenceNode.Point(0); } else { // Calculations are new. maxKernel = kernel.Evaluate(minDistance); minKernel = kernel.Evaluate(queryNode.MaxDistance(referenceNode)); bound = maxKernel - minKernel; } // If possible, avoid some calculations because of the error tolerance. if (newCalculations && bound <= (absError + relError * minKernel) / referenceSet.n_cols) { // Auxiliary variables. double kernelValue; kde::KDEStat& referenceStat = referenceNode.Stat(); kde::KDEStat& queryStat = queryNode.Stat(); // If calculating a center is not required. if (tree::TreeTraits<TreeType>::FirstPointIsCentroid) { kernelValue = EvaluateKernel(queryNode.Point(0), referenceNode.Point(0)); } // Sadly, we have no choice but to calculate the center. else { kernelValue = EvaluateKernel(queryStat.Centroid(), referenceStat.Centroid()); } // Sum up estimations. for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { densities(queryNode.Descendant(i)) += referenceNode.NumDescendants() * kernelValue; } score = DBL_MAX; } else { score = minDistance; } ++scores; traversalInfo.LastQueryNode() = &queryNode; traversalInfo.LastReferenceNode() = &referenceNode; traversalInfo.LastScore() = score; return score; }