int randomEMinit(double **x, int n, int p, int nclass, double *pi, double **Mu, double **LTSigma) { int *ordr,i,j,*clas,*nc; /* This is a bug. Modified by Wei-Chen Chen on 2009/03/13. MAKE_VECTOR(ordr,n); */ MAKE_VECTOR(ordr, nclass); MAKE_VECTOR(clas, n); MAKE_VECTOR(nc, nclass); do { /* This is a bug. Modified by Wei-Chen Chen on 2009/03/13. i=srswor(n, n, ordr); */ i=srswor(n, nclass, ordr); for(i=0;i<nclass;i++){ for(j=0;j<p;j++) Mu[i][j]=x[ordr[i]][j]; } for(i=0;i<n;i++) clas[i]=assign_closest(x[i],p,nclass,Mu); j=initials(x,n,p,nclass,nc,Mu,LTSigma,clas); } while (j==0); for(i=0;i<nclass;i++) pi[i]=1.*nc[i]/n; FREE_VECTOR(nc); FREE_VECTOR(clas); FREE_VECTOR(ordr); return 0; }
void kmeans(std::vector<DataPoint> data_points, int k) { std::vector<DataPoint> centroids = initial_centroids(data_points, k); int changed = data_points.size(); double done_threshold = (k * 0.10 * data_points.size()); while (changed > done_threshold) { changed = 0; for (std::vector<DataPoint>::iterator it = data_points.begin(); it != data_points.end(); ++it) { assign_closest(centroids, *it, changed); } // Memory leak. Delete attr of old, synthetic, centroids centroids = recompute_centroids(data_points, centroids); } print_data_points(centroids); print_data_points(data_points); }
/* This function is called by ss_shortems(). Mu[0, ..., labK-1] should be assigned before calling this function. */ void ss_randomEMinit(double **x, int n, int p, int nclass, double *pi, double **Mu, double **LTSigma, int *lab, int labK, int nonlab_total, int *lab_index){ int *ordr, i, j, *clas, *nc; int new_nclass = nclass - labK; double labMu[labK][p]; for(i = 0; i < labK; i++){ for(j = 0; j < p; j++) labMu[i][j] = Mu[i][j]; } /* Initial centers for all other unknown clusters. */ MAKE_VECTOR(ordr, new_nclass); MAKE_VECTOR(clas, n); MAKE_VECTOR(nc, nclass); do{ for(i = 0; i < labK; i++){ for(j = 0; j < p; j++) Mu[i][j] = labMu[i][j]; } i = srswor(nonlab_total, new_nclass, ordr); for(i = labK; i < nclass; i++){ for(j = 0; j < p; j++) Mu[i][j] = x[lab_index[ordr[i - labK]]][j]; } for(i = 0; i < n; i++){ if(lab[i] == -1){ clas[i] = assign_closest(x[i], p, nclass, Mu); } else{ clas[i] = lab[i]; } } j = initials(x, n, p, nclass, nc, Mu, LTSigma, clas); } while(j == 0); for(i = 0; i < nclass; i++) pi[i] = 1. * nc[i] / n; FREE_VECTOR(nc); FREE_VECTOR(clas); FREE_VECTOR(ordr); } /* End of ss_randomEMinit(). */
void RunKMedoids(const leveldb::Slice& begin, const leveldb::Slice& end, int K, leveldb::DB* db, leveldb::DB* work_db, int concurrency, std::ostream& ivar_out, std::ostream& cent_out) { auto very_start = std::chrono::system_clock::now(); auto key_centroids = uniform_init(begin, end, K); std::vector<GDELTMini> val_centroids(K); { auto it = iter(db); for (int i = 0; i < K; ++i) { it->Seek(key_centroids[i]); CHECK(it->Valid()); read(it->value(), val_centroids[i]); } } std::cout << "Divying up range among threads... " << std::flush; auto parvec = get_par_ranges(uniform_init(begin, end, concurrency), db, end); std::cout << "DONE" << std::endl; int i = 0; bool centers_changed = true; vuad totals(K); vuai cluster_sizes(K); for (int i = 0; i < K; ++i) { cluster_sizes[i].reset(new std::atomic<int>); totals[i].reset(new std::atomic<double>); } while (centers_changed) { auto start = std::chrono::system_clock::now(); assign_closest(parvec, totals, K, work_db, val_centroids, concurrency, cluster_sizes); auto tot = std::accumulate(totals.begin(), totals.end(), 0.0, [](double sum, typename vuad::value_type& d) { return sum + d->load(); }); auto end = std::chrono::system_clock::now(); std::cout << "Iteration " << ++i << " total intravariance " << tot; std::cout << "\n Assigning medoids took " << secs(start, end) << "s" << std::endl; start = std::chrono::system_clock::now(); for (auto& d : totals) { ivar_out << d->load() << " "; } ivar_out << std::endl; cent_out << key_centroids << std::endl; end = std::chrono::system_clock::now(); std::cout << " Saving medoids took " << secs(start, end) << " s" << std::endl; start = std::chrono::system_clock::now(); centers_changed = update_medoids(concurrency, K, db, work_db, val_centroids, key_centroids, totals, cluster_sizes); end = std::chrono::system_clock::now(); std::cout << " Medoid update took " << secs(start, end) << " s" << std::endl; start = end; } auto very_end = std::chrono::system_clock::now(); std::cout << "K-medoid clustering COMPLETE in " << secs(very_start, very_end) << " s" << std::endl; }