コード例 #1
0
int randomEMinit(double **x, int n, int p, int nclass, double *pi,
		 double **Mu, double **LTSigma)
{
  int *ordr,i,j,*clas,*nc;
  /* This is a bug. Modified by Wei-Chen Chen on 2009/03/13.
    MAKE_VECTOR(ordr,n);
  */
  MAKE_VECTOR(ordr, nclass);
  MAKE_VECTOR(clas, n);
  MAKE_VECTOR(nc, nclass);
  do {
    /* This is a bug. Modified by Wei-Chen Chen on 2009/03/13.
      i=srswor(n, n, ordr);
    */
    i=srswor(n, nclass, ordr);
    for(i=0;i<nclass;i++){
      for(j=0;j<p;j++) Mu[i][j]=x[ordr[i]][j];
    }
    for(i=0;i<n;i++) clas[i]=assign_closest(x[i],p,nclass,Mu);
    j=initials(x,n,p,nclass,nc,Mu,LTSigma,clas);
  } while (j==0);
  for(i=0;i<nclass;i++) pi[i]=1.*nc[i]/n;
  FREE_VECTOR(nc);
  FREE_VECTOR(clas);
  FREE_VECTOR(ordr);
  return 0;
}
コード例 #2
0
ファイル: kmeans.cpp プロジェクト: BastianoNove/kmeans
void kmeans(std::vector<DataPoint> data_points, int k) {
  std::vector<DataPoint> centroids = initial_centroids(data_points, k);

  int changed = data_points.size();
  double done_threshold = (k * 0.10 * data_points.size());

  while (changed > done_threshold) {
    changed = 0;

    for (std::vector<DataPoint>::iterator it = data_points.begin();
         it != data_points.end(); ++it) {
      assign_closest(centroids, *it, changed);
    }

    // Memory leak. Delete attr of old, synthetic, centroids
    centroids = recompute_centroids(data_points, centroids);
  }

  print_data_points(centroids);
  print_data_points(data_points);
}
コード例 #3
0
/* This function is called by ss_shortems().
   Mu[0, ..., labK-1] should be assigned before calling this function.
*/
void ss_randomEMinit(double **x, int n, int p, int nclass, double *pi,
   double **Mu, double **LTSigma,
   int *lab, int labK, int nonlab_total, int *lab_index){
  int *ordr, i, j, *clas, *nc;
  int new_nclass = nclass - labK;
  double labMu[labK][p];
  
  for(i = 0; i < labK; i++){
    for(j = 0; j < p; j++) labMu[i][j] = Mu[i][j];
  }
  
  /* Initial centers for all other unknown clusters. */
  MAKE_VECTOR(ordr, new_nclass);
  MAKE_VECTOR(clas, n);
  MAKE_VECTOR(nc, nclass);
  do{
    for(i = 0; i < labK; i++){
      for(j = 0; j < p; j++) Mu[i][j] = labMu[i][j];
    }
    i = srswor(nonlab_total, new_nclass, ordr);
    for(i = labK; i < nclass; i++){
      for(j = 0; j < p; j++) Mu[i][j] = x[lab_index[ordr[i - labK]]][j];
    }
    for(i = 0; i < n; i++){
      if(lab[i] == -1){
        clas[i] = assign_closest(x[i], p, nclass, Mu);
      } else{
        clas[i] = lab[i];
      }
    } 
    j = initials(x, n, p, nclass, nc, Mu, LTSigma, clas);
  } while(j == 0);
  for(i = 0; i < nclass; i++) pi[i] = 1. * nc[i] / n;
  FREE_VECTOR(nc);
  FREE_VECTOR(clas);
  FREE_VECTOR(ordr);
} /* End of ss_randomEMinit(). */
コード例 #4
0
void RunKMedoids(const leveldb::Slice& begin,
                 const leveldb::Slice& end,
                 int K, leveldb::DB* db,
                 leveldb::DB* work_db, int concurrency,
                 std::ostream& ivar_out, std::ostream& cent_out) {
  auto very_start = std::chrono::system_clock::now();
  auto key_centroids = uniform_init(begin, end, K);
  std::vector<GDELTMini> val_centroids(K);
  {
    auto it = iter(db);
    for (int i = 0; i < K; ++i) {
      it->Seek(key_centroids[i]);
      CHECK(it->Valid());
      read(it->value(), val_centroids[i]);
    }
  }

  std::cout << "Divying up range among threads... " << std::flush;
  auto parvec = get_par_ranges(uniform_init(begin, end, concurrency), db, end);
  std::cout << "DONE" << std::endl;

  int i = 0;
  bool centers_changed = true;
  vuad totals(K);
  vuai cluster_sizes(K);
  for (int i = 0; i < K; ++i) {
    cluster_sizes[i].reset(new std::atomic<int>);
    totals[i].reset(new std::atomic<double>);
  }

  while (centers_changed) {
    auto start = std::chrono::system_clock::now();
    assign_closest(parvec, totals, K, work_db, val_centroids, concurrency,
                   cluster_sizes);
    auto tot = std::accumulate(totals.begin(), totals.end(), 0.0,
                               [](double sum, typename vuad::value_type& d) {
                                 return sum + d->load();
                               });
    auto end = std::chrono::system_clock::now();
    std::cout << "Iteration " << ++i << " total intravariance " << tot;
    std::cout << "\n    Assigning medoids took " << secs(start, end)
              << "s" << std::endl;

    start = std::chrono::system_clock::now();
    for (auto& d : totals) {
      ivar_out << d->load() << " ";
    }
    ivar_out << std::endl;
    cent_out << key_centroids << std::endl;
    end = std::chrono::system_clock::now();
    std::cout << "    Saving medoids took " << secs(start, end)
              << " s" << std::endl;

    start = std::chrono::system_clock::now();
    centers_changed = update_medoids(concurrency, K, db, work_db,
                                     val_centroids, key_centroids,
                                     totals, cluster_sizes);
    end = std::chrono::system_clock::now();
    std::cout << "    Medoid update took " << secs(start, end)
              << " s" << std::endl;
    start = end;
  }
  auto very_end = std::chrono::system_clock::now();
  std::cout << "K-medoid clustering COMPLETE in "
            << secs(very_start, very_end) << " s" << std::endl;
}