// Main loop for an instance of the algorithm. double run() { size_t i; size_t j; size_t k; printf("Initialization.\n"); init_medoids(); if(verbose) print_medoids(medoids); for(k = 0; k < clustc; ++k) { for(j = 0; j < dmatrixc; ++j) { weights[k][j] = 1.0; } } if(verbose) print_weights(weights); update_memb(); if(verbose) print_memb(memb); double prev_adeq = 0.0; double adeq = adequacy_obj(false); printf("Adequacy: %.20lf\n", adeq); double diff = fabs(adeq - prev_adeq); for(i = 1; i <= max_iter && diff > epsilon; ++i) { printf("Iteration %d.\n", i); prev_adeq = adeq; adequacy_cluster(false); update_medoids(); adeq = adequacy_cluster(true); if(verbose) { print_medoids(medoids); printf("Adequacy1: %.20lf\n", adeq); } adequacy_cluster(false); update_weights(); adeq = adequacy_cluster(true); if(verbose) { print_weights(weights); printf("Adequacy2: %.20lf\n", adeq); } adequacy_obj(false); update_memb(); adeq = adequacy_obj(true); if(verbose) print_memb(memb); printf("Adequacy: %.20lf\n", adeq); if(dgt(adeq, prev_adeq)) { printf("Warn: current adequacy is greater than " "previous iteration (%.20lf)\n", adeq - prev_adeq); } diff = fabs(adeq - prev_adeq); } printf("Adequacy difference threshold reached (%.20lf).\n", diff); return adeq; }
void RunKMedoids(const leveldb::Slice& begin, const leveldb::Slice& end, int K, leveldb::DB* db, leveldb::DB* work_db, int concurrency, std::ostream& ivar_out, std::ostream& cent_out) { auto very_start = std::chrono::system_clock::now(); auto key_centroids = uniform_init(begin, end, K); std::vector<GDELTMini> val_centroids(K); { auto it = iter(db); for (int i = 0; i < K; ++i) { it->Seek(key_centroids[i]); CHECK(it->Valid()); read(it->value(), val_centroids[i]); } } std::cout << "Divying up range among threads... " << std::flush; auto parvec = get_par_ranges(uniform_init(begin, end, concurrency), db, end); std::cout << "DONE" << std::endl; int i = 0; bool centers_changed = true; vuad totals(K); vuai cluster_sizes(K); for (int i = 0; i < K; ++i) { cluster_sizes[i].reset(new std::atomic<int>); totals[i].reset(new std::atomic<double>); } while (centers_changed) { auto start = std::chrono::system_clock::now(); assign_closest(parvec, totals, K, work_db, val_centroids, concurrency, cluster_sizes); auto tot = std::accumulate(totals.begin(), totals.end(), 0.0, [](double sum, typename vuad::value_type& d) { return sum + d->load(); }); auto end = std::chrono::system_clock::now(); std::cout << "Iteration " << ++i << " total intravariance " << tot; std::cout << "\n Assigning medoids took " << secs(start, end) << "s" << std::endl; start = std::chrono::system_clock::now(); for (auto& d : totals) { ivar_out << d->load() << " "; } ivar_out << std::endl; cent_out << key_centroids << std::endl; end = std::chrono::system_clock::now(); std::cout << " Saving medoids took " << secs(start, end) << " s" << std::endl; start = std::chrono::system_clock::now(); centers_changed = update_medoids(concurrency, K, db, work_db, val_centroids, key_centroids, totals, cluster_sizes); end = std::chrono::system_clock::now(); std::cout << " Medoid update took " << secs(start, end) << " s" << std::endl; start = end; } auto very_end = std::chrono::system_clock::now(); std::cout << "K-medoid clustering COMPLETE in " << secs(very_start, very_end) << " s" << std::endl; }