Example #1
0
  void KMeans::run()
  {

    bool move;
    bool some_point_is_moving = true;
    unsigned int num_iterations = 0;
    PointId pid;
    ClusterId cid, to_cluster;
    Distance d, min;


    // Initial partition of points
    initial_partition_points();
    // Until not converge
    while (some_point_is_moving)
      {
        some_point_is_moving = false;
        compute_centroids();

        // for each point
        for (pid=0; pid<n_; ++pid)
          {
            // distance from current cluster
            ClusterId curr_ID = points_to_clusters_[pid];
            Point curr_centroid =  centroids_.row(curr_ID);
            min = distance(curr_centroid, X_.getdatai(pid));
            // for each centroid
            cid = 0;
            move = false;
            for(int k=0; k<k_;++k)
              {
                d = distance(centroids_.row(k), X_.getdatai(pid));
                if (d < min)
                  {
                    min = d;
                    move = true;
                    to_cluster = cid;
                    // remove from current cluster
                    clusters_to_points_[points_to_clusters_[pid]].erase(pid);
                    some_point_is_moving = true;
                  }
                cid++;
              }

            // move towards a closer centroid
            if (move)
              {
                // insert
                points_to_clusters_[pid] = to_cluster;
                clusters_to_points_[to_cluster].insert(pid);
              }
          }

        num_iterations++;
      }
    std::cout<<"Num Iter: "<<num_iterations<<std::endl;
  }
Example #2
0
/*
 * Clusters data.
 */
static void kmeans(void)
{	
	int i;

	omp_set_num_threads(NUM_THREADS);
	for (i = 0; i < NUM_THREADS; i++)
		omp_init_lock(&lock[i]);
	
	/* Cluster data. */
	do
	{	
		populate();
		compute_centroids();
	} while (again());
}
Example #3
0
void Clusters::generate(RecTable & mods0, 
                        int nreclusters,
                        int niterations,
                        bool grahamSchmidt)
{
  bool verbose = true;
  cout << "Generating clusters using " << mods0.size() << " users" << endl;

  int i;
  if(!mods0.size()) {
    cout << "Bad: mods table uninitialized" << endl;
    return;
  }

  if(verbose) {
    cout << "Mods table has " << mods0.size() << " users" << endl;
    cout << "generating clusters" << endl;
  }
  
  clear();

  // default to every user in a random bin
  ModsTable::iterator it;
  for(it = mods0.begin(), i=0;
      it != mods0.end(); i++, it++) {
    if(it == mods0.end()) {
      cout << "Bad: mods table is smaller than the number of clusters" << endl;
      return;
    }
    clusters[i % nclusters].push_back((*it).first);
  }
  
    cout << "Number of clusters: " << clusters.size() << endl;
  // recompute them
  compute_centroids();

 restart_this_sucker:

  if (grahamSchmidt) GrahamSchmidt(1);

  for(int iter = 0; iter < niterations; iter++) {
    time_t start_time = time(NULL);
    
    if(verbose) 
      cout << "Iteration: " << iter << endl;

    // wipe the existing clusters
    clear();

    if(verbose)
      cout << " --> Binning users" << endl;
    int i = 0;
    for(ModsTable::iterator user = mods0.begin();
        user != mods0.end(); user++, i++) 
      add((*user).first);
    
    if(verbose)
      cout << " --> Copying current centroids" << endl;
    // make a copy of the current centroids
    vector<User> oldcentroids;
    for(i = 0; i < nclusters; i++) {
      oldcentroids.push_back(User());
      for(User::iterator article = centroids[i].begin();
          article != centroids[i].end(); article++) {
        oldcentroids[i].add((*article).first,
                            centroids[i][(*article).first]);
      }
    }
    
    // recompute them
    compute_centroids();
    
    float quality=0, difference=0;
    int n_non_zero = 0;
    for(i = 0; i < nclusters; i++) {
      if(clusters[i].empty()) continue;
      n_non_zero++;
      float q = spread[i];
      quality += q;
      //      float d = centroids[i] | oldcentroids[i];
      float d = dist(&centroids[i], &oldcentroids[i], true);
      difference += d;
      int n_per_user = 0;
      for(vector<int>::iterator it = clusters[i].begin();
          it != clusters[i].end(); it++) {
        n_per_user += mods0[(*it)].size();
      }
      if(verbose) 
        cout << "\t( #" << setw(2) << i 
             << ", Q:" << setprecision(3) << setw(7)
             << q << ", Delta:" << setprecision(3) << setw(7) << d 
             << ", Users:" << setw(6) << clusters[i].size() 
             << ", Arts:" << setw(6) << centroids[i].size() 
             << ", Avg # art:" << setw(6) << float(n_per_user)/float(clusters[i].size())
             << ")" 
             << endl;
    }
    if(n_non_zero) {
      quality /= float(n_non_zero);
      difference /= float(n_non_zero);
    }
    if(verbose) 
      cout << "Quality " << quality << ", Difference " << difference 
           << ":  Total time " << difftime(time(NULL), start_time) << "sec" 
           << endl;
    if(difference > .98) break;
  }

  // dump the self-similarity matrix for the centroids
  cout << endl << "\x1b[31mCentroid Comparison Matrix\x1b[0m" << endl;
  for(i = 0; i < nclusters; i++) {
    printf("\t");
    for(int j = 0; j < nclusters; j++) {
      printf("%6.3f", dist(&centroids[i], &centroids[j], true));
      if(j != nclusters -1)
        printf(",");
    }
    printf("\n");
  }

  // dump the self-similarity matrix for the centroids
  cout << endl << "\x1b[31mCentroid Comparison Matrix (dot)\x1b[0m" << endl;
  for(i = 0; i < nclusters; i++) {
    printf("\t");
    for(int j = 0; j < nclusters; j++) {
      printf("%8.3f", dot(&centroids[i], &centroids[j]));
      if(j != nclusters -1)
        printf(",");
    }
    printf("\n");
  }
  // and the corresponding number of entries per cluster
  cout << endl<< "\x1b[31m Article availability\x1b[0m" << endl;
  for(i = 0; i < nclusters; i++) {
    cout << "\tCluster #" << i << ": " << centroids[i].size() 
         << ", Norm: " << norm(&centroids[i]) << endl;
  }

  if(nreclusters --) goto restart_this_sucker;

}