vector< vector<u32> > kmeans(const vector< vector<f64> >& points, u32 k, vector< vector<f64> >& centers) { if (points.size() == 0) throw eInvalidArgument("There must be points to cluster!"); if (points[0].size() == 0) throw eInvalidArgument("There must be at least one dimension!"); for (size_t i = 1; i < points.size(); i++) if (points[i].size() != points[0].size()) throw eInvalidArgument("All data points must have the same dimensionality."); if (k == 0) throw eInvalidArgument("Clustering into zero clusters makes no sense."); if ((u32)centers.size() != k) throw eInvalidArgument("You must supply k initial centers to this version of kmeans()."); for (u32 i = 0; i < k; i++) if (centers[i].size() != points[0].size()) throw eInvalidArgument("All initial centers must have the same dimensionality as the data points."); vector< vector<u32> > clusters(k); while (true) { vector< vector<u32> > newClusters(k); for (size_t i = 0; i < points.size(); i++) { f64 dist = s_distanceSquared(points[i], centers[0]); u32 closest = 0; for (u32 c = 1; c < k; c++) { f64 distHere = s_distanceSquared(points[i], centers[c]); if (distHere < dist) { closest = c; dist = distHere; } } newClusters[closest].push_back((u32)i); } for (u32 i = 0; i < k; i++) if (newClusters[i].size() > 0) centers[i] = s_calcCenter(points, newClusters[i]); // Else, what should I do? Leave it? Randomize a new center? if (clusters == newClusters) break; clusters = newClusters; } return clusters; }
DEBaseAlgorithm::DEBaseAlgorithm(DEBaseParameters& para) :_parameters(para), _bestClusters(para.N,para.atomTypes,para.atomNumber) { int i = 0; for(i = 0; i < para._numberOfClusters; i++) { DEBaseClusters newClusters(para.N,para.atomTypes,para.atomNumber); _X.push_back(newClusters); _V.push_back(newClusters); _U.push_back(newClusters); } }
// Zhu et al. "A Rank-Order Distance based Clustering Algorithm for Face Tagging", CVPR 2011 br::Clusters br::ClusterGallery(const QStringList &simmats, float aggressiveness, const QString &csv) { qDebug("Clustering %d simmat(s)", simmats.size()); // Read in gallery parts, keeping top neighbors of each template Neighborhood neighborhood = getNeighborhood(simmats); const int cutoff = neighborhood.first().size(); const float threshold = 3*cutoff/4 * aggressiveness/5; // Initialize clusters Clusters clusters(neighborhood.size()); for (int i=0; i<neighborhood.size(); i++) clusters[i].append(i); bool done = false; while (!done) { // nextClusterIds[i] = j means that cluster i is set to merge into cluster j QVector<int> nextClusterIDs(neighborhood.size()); for (int i=0; i<neighborhood.size(); i++) nextClusterIDs[i] = i; // For each cluster for (int clusterID=0; clusterID<neighborhood.size(); clusterID++) { const Neighbors &neighbors = neighborhood[clusterID]; int nextClusterID = nextClusterIDs[clusterID]; // Check its neighbors foreach (const Neighbor &neighbor, neighbors) { int neighborID = neighbor.first; int nextNeighborID = nextClusterIDs[neighborID]; // Don't bother if they have already merged if (nextNeighborID == nextClusterID) continue; // Flag for merge if similar enough if (normalizedROD(neighborhood, clusterID, neighborID) < threshold) { if (nextClusterID < nextNeighborID) nextClusterIDs[neighborID] = nextClusterID; else nextClusterIDs[clusterID] = nextNeighborID; } } } // Transitive merge for (int i=0; i<neighborhood.size(); i++) { int nextClusterID = i; while (nextClusterID != nextClusterIDs[nextClusterID]) { assert(nextClusterIDs[nextClusterID] < nextClusterID); nextClusterID = nextClusterIDs[nextClusterID]; } nextClusterIDs[i] = nextClusterID; } // Construct new clusters QHash<int, int> clusterIDLUT; QList<int> allClusterIDs = QSet<int>::fromList(nextClusterIDs.toList()).values(); for (int i=0; i<neighborhood.size(); i++) clusterIDLUT[i] = allClusterIDs.indexOf(nextClusterIDs[i]); Clusters newClusters(allClusterIDs.size()); Neighborhood newNeighborhood(allClusterIDs.size()); for (int i=0; i<neighborhood.size(); i++) { int newID = clusterIDLUT[i]; newClusters[newID].append(clusters[i]); newNeighborhood[newID].append(neighborhood[i]); } // Update indices and trim for (int i=0; i<newNeighborhood.size(); i++) { Neighbors &neighbors = newNeighborhood[i]; int size = qMin(neighbors.size(),cutoff); std::partial_sort(neighbors.begin(), neighbors.begin()+size, neighbors.end(), compareNeighbors); for (int j=0; j<size; j++) neighbors[j].first = clusterIDLUT[j]; neighbors = neighbors.mid(0, cutoff); } // Update results done = true; //(newClusters.size() >= clusters.size()); clusters = newClusters; neighborhood = newNeighborhood; }