Esempio n. 1
0
// Main loop for an instance of the algorithm.
double run() {
	size_t i;
	size_t j;
	size_t k;
	printf("Initialization.\n");
	init_medoids();
    if(verbose) print_medoids(medoids);
	for(k = 0; k < clustc; ++k) {
		for(j = 0; j < dmatrixc; ++j) {
			weights[k][j] = 1.0;
		}
	}
	if(verbose) print_weights(weights);
	update_memb();
	if(verbose) print_memb(memb);
	double prev_adeq = 0.0;
	double adeq = adequacy_obj(false);
	printf("Adequacy: %.20lf\n", adeq);
    double diff = fabs(adeq - prev_adeq);
	for(i = 1; i <= max_iter && diff > epsilon; ++i) {
        printf("Iteration %d.\n", i);
        prev_adeq = adeq;
		adequacy_cluster(false);
        update_medoids();
		adeq = adequacy_cluster(true);
        if(verbose) {
            print_medoids(medoids);
            printf("Adequacy1: %.20lf\n", adeq);
        }
		adequacy_cluster(false);
        update_weights();
		adeq = adequacy_cluster(true);
        if(verbose) {
            print_weights(weights);
            printf("Adequacy2: %.20lf\n", adeq);
        }
		adequacy_obj(false);
        update_memb();
		adeq = adequacy_obj(true);
        if(verbose) print_memb(memb);
        printf("Adequacy: %.20lf\n", adeq);
        if(dgt(adeq, prev_adeq)) {
            printf("Warn: current adequacy is greater than "
                    "previous iteration (%.20lf)\n",
                    adeq - prev_adeq);
        }
        diff = fabs(adeq - prev_adeq);
	}
    printf("Adequacy difference threshold reached (%.20lf).\n",
            diff);
    return adeq;
}
void RunKMedoids(const leveldb::Slice& begin,
                 const leveldb::Slice& end,
                 int K, leveldb::DB* db,
                 leveldb::DB* work_db, int concurrency,
                 std::ostream& ivar_out, std::ostream& cent_out) {
  auto very_start = std::chrono::system_clock::now();
  auto key_centroids = uniform_init(begin, end, K);
  std::vector<GDELTMini> val_centroids(K);
  {
    auto it = iter(db);
    for (int i = 0; i < K; ++i) {
      it->Seek(key_centroids[i]);
      CHECK(it->Valid());
      read(it->value(), val_centroids[i]);
    }
  }

  std::cout << "Divying up range among threads... " << std::flush;
  auto parvec = get_par_ranges(uniform_init(begin, end, concurrency), db, end);
  std::cout << "DONE" << std::endl;

  int i = 0;
  bool centers_changed = true;
  vuad totals(K);
  vuai cluster_sizes(K);
  for (int i = 0; i < K; ++i) {
    cluster_sizes[i].reset(new std::atomic<int>);
    totals[i].reset(new std::atomic<double>);
  }

  while (centers_changed) {
    auto start = std::chrono::system_clock::now();
    assign_closest(parvec, totals, K, work_db, val_centroids, concurrency,
                   cluster_sizes);
    auto tot = std::accumulate(totals.begin(), totals.end(), 0.0,
                               [](double sum, typename vuad::value_type& d) {
                                 return sum + d->load();
                               });
    auto end = std::chrono::system_clock::now();
    std::cout << "Iteration " << ++i << " total intravariance " << tot;
    std::cout << "\n    Assigning medoids took " << secs(start, end)
              << "s" << std::endl;

    start = std::chrono::system_clock::now();
    for (auto& d : totals) {
      ivar_out << d->load() << " ";
    }
    ivar_out << std::endl;
    cent_out << key_centroids << std::endl;
    end = std::chrono::system_clock::now();
    std::cout << "    Saving medoids took " << secs(start, end)
              << " s" << std::endl;

    start = std::chrono::system_clock::now();
    centers_changed = update_medoids(concurrency, K, db, work_db,
                                     val_centroids, key_centroids,
                                     totals, cluster_sizes);
    end = std::chrono::system_clock::now();
    std::cout << "    Medoid update took " << secs(start, end)
              << " s" << std::endl;
    start = end;
  }
  auto very_end = std::chrono::system_clock::now();
  std::cout << "K-medoid clustering COMPLETE in "
            << secs(very_start, very_end) << " s" << std::endl;
}