void oskmeans::clusterize() { // initialize. initialize(); if (_snippets.empty()) return; // clustering. while (!stopping_criterion()) { #ifdef DEBUG std::cerr << "[Debug]:clusterize, iteration #" << _iterations << std::endl; #endif for (short c=0; c<_K; c++) { // clear the cluster. _clusters[c].clear(); } // clear the garbage cluster. _garbage_cluster.clear(); // iterates points and associate each of them with a cluster. hash_map<uint32_t,hash_map<uint32_t,float,id_hash_uint>*,id_hash_uint>::const_iterator hit = _points.begin(); while (hit!=_points.end()) { float learning_rate = oskmeans::_nu0*pow((oskmeans::_nuf/oskmeans::_nu0),_t/static_cast<float>(_points.size()*oskmeans::_niterations)); #ifdef DEBUG std::cerr << "learning rate: " << learning_rate << std::endl; #endif // find closest cluster to this point. short cl = assign_cluster((*hit).first,(*hit).second); // recomputation of centroids/medoids. if (cl != -1) { float cl_norm = 0.0; recompute_centroid(learning_rate,&_clusters[cl]._c,(*hit).second,cl_norm); normalize_centroid(&_clusters[cl]._c,cl_norm); } ++hit; _t++; } // count iteration. _iterations++; } }
void kmeans(int n, int k){ int i, count, cluster; double percent_change; int* cluster_membership; double **new_clusters; new_clusters = (double **)malloc(k*sizeof(double *)); cluster_membership = (int *)malloc(n*sizeof(int)); count = 0; int threshold = 10; percent_change = 100; //choose first set of cluster centers for (i = 0; i < k; i++){ clusters[i][0] = data[i][0]; clusters[i][1] = data[i][1]; } //initialize new clusters variable //it has three coords: [x][y][number of members] for (i = 0; i < k; i++){ new_clusters[i] = (double *)malloc(3*sizeof(double)); new_clusters[i][0] = 0; new_clusters[i][1] = 0; new_clusters[i][2] = 0; } for (i = 0; i < n; i++){ cluster_membership[i] = -1; } while((percent_change > threshold) && (count < 100)){ percent_change = 0; for(i = 0; i < n; i++){ //find the cluster it belongs to cluster = assign_cluster(data[i][0], data[i][1], k); //check to see if this data point changed clusters if (cluster != cluster_membership[i]) { percent_change++; cluster_membership[i] = cluster; } //track the new cluster sum new_clusters[cluster][0] += data[i][0]; new_clusters[cluster][1] += data[i][1]; new_clusters[cluster][2]++; } //calculate the new cluster centers and update for (i = 0; i < k; i++){ if (new_clusters[i][2] > 0){ new_clusters[i][0] /= new_clusters[i][2]; new_clusters[i][1] /= new_clusters[i][2]; } clusters[i][0] = new_clusters[i][0]; clusters[i][1] = new_clusters[i][1]; new_clusters[i][0] = new_clusters[i][1] = new_clusters[i][2] = 0; } percent_change /= n; count++; } for (i = 0; i < n; i++){ printf("%d %d %d\n", data[i][0], data[i][1], cluster_membership[i]); } }