/* * Cluster all the samples again. */ bool OfflineKMeans::reCluster() { bool clusterChanged = false; for(int i = 0; i < sampleNum; ++i) { const float* sample = samples.getSample(i); int oldCluster = clusterIndexes[i]; int newCluster = cluster(sample, featureNum); if(newCluster != oldCluster) { clusterChanged = true; #ifdef OKM_DBG std::cout<<"i = "<<i<<", oldCluster = "<<oldCluster<<", newCluster = " <<newCluster<<std::endl; #endif // move sample from the old cluster to the new cluster. clusters[oldCluster].second.erase(const_cast<float*>(sample)); clusters[newCluster].second.insert(const_cast<float*>(sample)); /* * set clusterIndexes[i] to newCluster to indicate now sample * indexed by 'i' is in cluster indexed by 'newCluster'. */ clusterIndexes[i] = newCluster; } } if(clusterChanged == true) updateCentroids(); return clusterChanged; }
void Clustering<N, ObjectT, DistanceP>::apply( objectList_T const & objects ) { // iteration loop truncated at maxIt for( int it = 0; it < M_maxIt; it++ ) { if( M_verbose ) std::cout << "=======================" << std::endl << "iteration " << it << std::endl; // clean up object.list for( size_t d = 0; d < N; d++ ) M_objectList[d].clear(); // loop on the object set for( size_t kObj = 0; kObj < objects.size(); kObj++ ) { // compute the distance of the k-th object from all the centroids std::array<real,N> distances; for( size_t d = 0; d < N; d++ ) { distances[d] = M_distancePolicy( objects[kObj], M_centroids[d] ); } // find the minimum distance int minPos = std::min_element( distances.begin(), distances.end() ) - distances.begin(); // insert the object in the list of the closest centroid M_objectList[minPos].push_back( objects[kObj] ); } if( M_verbose ) { for( size_t d = 0; d < N; d++ ) { std::cout << "centroid " << d << " has " << M_objectList[d].size() << " objects" << std::endl; for( size_t j = 0; j < M_objectList[d].size(); j++ ) std::cout << M_objectList[d][j] << std::endl; } } bool const isIncremented = updateCentroids(); printGnuplot( it ); if ( !isIncremented ) { if ( M_verbose ) std::cout << "iteration interrupted at " << it << std::endl; break; } } computeQuality(); }
void OfflineKMeans::initClusters() { // calculate the minimum and maximum values of each feature std::pair<float, float>* featureRanges = new std::pair<float, float>[featureNum]; for(int j = 0; j < featureNum; ++j) { float minFeatureJ = std::numeric_limits<float>::max(); float maxFeatureJ = std::numeric_limits<float>::min(); for(int i = 0; i < sampleNum; ++i) { float feature = samples.getSample(i)[j]; if(minFeatureJ > feature) minFeatureJ = feature; if(maxFeatureJ < feature) maxFeatureJ = feature; } featureRanges[j].first = minFeatureJ; featureRanges[j].second = maxFeatureJ; } // initialize centroids of clusters. clusters = new std::pair<float*, std::set<float*> >[clusterNum]; for(int i = 0; i < clusterNum; ++i) { float* centroid = new float[featureNum]; srand(i+1); float scale = static_cast<float>(rand()) / RAND_MAX; for(int j = 0; j < featureNum; ++j) { float minFeatureJ = featureRanges[j].first; float maxFeatureJ = featureRanges[j].second; centroid[j] = minFeatureJ + (maxFeatureJ - minFeatureJ) * scale; } clusters[i].first = centroid; } delete[] featureRanges; #ifdef OKM_DBG std::cout<<"Centroids of clusters initialized randomly:"<<std::endl; for(int i = 0; i < clusterNum; ++i) { std::cout<<"<"; float* centroid = clusters[i].first; int j; for(j = 0; j < featureNum-1; ++j) std::cout<<centroid[j]<<" "; std::cout<<centroid[j]<<">"<<std::endl; } #endif /* cluster all samples for the first time. * setup mapping from samples to clusters and centroids of clusters. */ clusterIndexes = new int[sampleNum]; for(int i = 0; i < sampleNum; ++i) { const float* sample = samples.getSample(i); int targetCluster = cluster(sample, featureNum); clusterIndexes[i] = targetCluster; clusters[targetCluster].second.insert(const_cast<float*>(sample)); } updateCentroids(); }