double PellegMooreKMeans<MetricType, MatType>::Iterate( const arma::mat& centroids, arma::mat& newCentroids, arma::Col<size_t>& counts) { newCentroids.zeros(centroids.n_rows, centroids.n_cols); counts.zeros(centroids.n_cols); // Create rules object. typedef PellegMooreKMeansRules<MetricType, TreeType> RulesType; RulesType rules(dataset, centroids, newCentroids, counts, metric); // Use single-tree traverser. typename TreeType::template SingleTreeTraverser<RulesType> traverser(rules); // Now, do a traversal with a fake query index (since the query index is // irrelevant; we are checking each node with all clusters. traverser.Traverse(0, *tree); distanceCalculations += rules.DistanceCalculations(); // Now, calculate how far the clusters moved, after normalizing them. double residual = 0.0; for (size_t c = 0; c < centroids.n_cols; ++c) { if (counts[c] > 0) { newCentroids.col(c) /= counts(c); residual += std::pow(metric.Evaluate(centroids.col(c), newCentroids.col(c)), 2.0); } } distanceCalculations += centroids.n_cols; return std::sqrt(residual); }
double DualTreeKMeans<MetricType, MatType, TreeType>::Iterate( const arma::mat& centroids, arma::mat& newCentroids, arma::Col<size_t>& counts) { newCentroids.zeros(centroids.n_rows, centroids.n_cols); counts.zeros(centroids.n_cols); if (clusterDistances.n_elem != centroids.n_cols + 1) { clusterDistances.set_size(centroids.n_cols + 1); clusterDistances.fill(DBL_MAX / 2.0); // To prevent overflow. } // Build a tree on the centroids. std::vector<size_t> oldFromNewCentroids; TreeType* centroidTree = BuildTree<TreeType>( const_cast<typename TreeType::Mat&>(centroids), oldFromNewCentroids); // Now run the dual-tree algorithm. typedef DualTreeKMeansRules<MetricType, TreeType> RulesType; RulesType rules(dataset, centroids, newCentroids, counts, oldFromNewCentroids, iteration, clusterDistances, distances, assignments, distanceIteration, metric); // Use the dual-tree traverser. //typename TreeType::template DualTreeTraverser<RulesType> traverser(rules); typename TreeType::template BreadthFirstDualTreeTraverser<RulesType> traverser(rules); traverser.Traverse(*centroidTree, *tree); distanceCalculations += rules.DistanceCalculations(); // Now, calculate how far the clusters moved, after normalizing them. double residual = 0.0; clusterDistances.zeros(); for (size_t c = 0; c < centroids.n_cols; ++c) { if (counts[c] == 0) { newCentroids.col(c).fill(DBL_MAX); // Should have happened anyway I think. } else { const size_t oldCluster = oldFromNewCentroids[c]; newCentroids.col(oldCluster) /= counts(oldCluster); const double dist = metric.Evaluate(centroids.col(c), newCentroids.col(oldCluster)); if (dist > clusterDistances[centroids.n_cols]) clusterDistances[centroids.n_cols] = dist; clusterDistances[oldCluster] = dist; residual += std::pow(dist, 2.0); } } Log::Info << clusterDistances.t(); delete centroidTree; ++iteration; return std::sqrt(residual); }