static void kmeans_center_multiple_restarts( unsigned nb_restarts, cluster_t nb_center, void (*center_init_f)(cluster_t, histogram_c &, dataset_t &, nbgen &), histogram_c ¢er, dataset_t &dataset, nbgen &rng) { std::vector<histogram_c> center_c(nb_restarts); for (unsigned i = 0; i < nb_restarts; ++i) center_init_f(nb_center, center_c[i], dataset, rng); unsigned nb_features = dataset[0].histogram.size(); std::vector<double> cluster_dists(nb_restarts); for (unsigned r = 0; r < nb_restarts; ++r) { double sum = 0; unsigned count = 0; std::vector<double> distances(nb_center, 0); for (unsigned i = 0; i < nb_center; ++i) { for (unsigned j = 0; j < nb_center; ++j) { if (j == i) continue; double dist = l2_distance(center_c[r][i], center_c[r][j], nb_features); distances[i] += dist; ++count; } sum += distances[i]; } cluster_dists[r] = sum / count; // printf("restart:%u -> %f\n", r, cluster_dists[r]); } size_t max_cluster = std::distance( cluster_dists.begin(), std::max_element(cluster_dists.begin(), cluster_dists.end())); // printf("min center index: %zu\n", max_cluster); center = center_c[max_cluster]; }
void find_optimal_clustering(Mat &dists, std::vector<idx_cluster_t> &idxClusters, const cluster_vars &vars) { int optimalClusters = floor(sqrt(dists.rows)); Mat initial; double r = vars.mcl_inflation_power; double r_dec = r - 1; double multStep = 0.6; cluster_dists(dists, initial, r, vars); interpret_clusters(initial, idxClusters); double clusterRatio = (int)idxClusters.size() / (double)optimalClusters; int iterations = vars.cluster_max_iterations; double r_used = r; if(clusterRatio < 1) { // want more clusters - larger r Mat prevmat; for(int i = 0; i < iterations; i++) { r_dec *= (1 - multStep) + 1; r_used = 1 + r_dec; Mat mclmat; cluster_dists(dists, mclmat, r_used, vars); idxClusters.clear(); interpret_clusters(mclmat, idxClusters); clusterRatio = (int)idxClusters.size() / (double)optimalClusters; //printf(" - %3d: Testing r of %6.3f and got clusterRatio of %6.3f\n", i, r_used, clusterRatio); if(clusterRatio > 1) { interpret_clusters(prevmat, idxClusters); break; } else if(clusterRatio == 1.0) { break; } else { mclmat.copyTo(prevmat); } } //printf(" - Finally using r of %6.3f to get %3d clusters from %3d hists for a clusterRatio of %6.3f\n", r_used, (int)idxClusters.size(), dists.rows, clusterRatio); } else if(clusterRatio > 1) { // want fewer clusters - smaller r for(int i = 0; i < iterations; i++) { r_dec *= multStep; r_used = 1 + r_dec; Mat mclmat; cluster_dists(dists, mclmat, r_used, vars); idxClusters.clear(); interpret_clusters(mclmat, idxClusters); clusterRatio = (int)idxClusters.size() / (double)optimalClusters; //printf(" - %3d: Testing r of %6.3f and got clusterRatio of %6.3f\n", i, r_used, clusterRatio); if(clusterRatio <= 1.0) { break; } } //printf(" - Using r of %6.3f to get %3d clusters from %3d hists for a clusterRatio of %6.3f\n", r_used, (int)idxClusters.size(), dists.rows, clusterRatio); } /* int optimalClustersMax = ceil(sqrt(dists.rows)); int optimalClustersMin = floor(sqrt(dists.rows)); int optimalCase = (int)ceil(sqrt((int)dists.rows)*2); double optimalRatio = optimalCase / (double)dists.rows; */ /* printf("Optimal Case Checks: %d\n", optimalCase); printf("Optimal Check Ratio: %.3f\n", optimalRatio); printf("Optimal Clusters: %d - %d\n\n", optimalClustersMin, optimalClustersMax); */ /* Mat initial; double r = vars.mcl_inflation_power; cluster_dists(dists, initial, r, vars); interpret_clusters(initial, idxClusters); int checkClusters = (int)idxClusters.size(); int iterations = vars.cluster_max_iterations; int base = 7; bool makeLarger = (checkClusters < optimalClustersMin); for(int i = 0; i < iterations; i++) { if(checkClusters < optimalClustersMin && makeLarger) r *= (base + 1.0 + i) / base; // need more clusters - larger r else if(checkClusters > optimalClustersMax && !makeLarger) r *= (base - 1.0 - i) / base; // need fewer clusters - smaller r else break; if(r <= 1) break; Mat mclmat; cluster_dists(dists, mclmat, r, vars); idxClusters.clear(); interpret_clusters(mclmat, idxClusters); checkClusters = (int)idxClusters.size(); } */ }