Exemple #1
0
  void oskmeans::clusterize()
  {
    // initialize.
    initialize();

    if (_snippets.empty())
      return;

    // clustering.
    while (!stopping_criterion())
      {
#ifdef DEBUG
        std::cerr << "[Debug]:clusterize, iteration #" << _iterations << std::endl;
#endif

        for (short c=0; c<_K; c++)
          {
            // clear the cluster.
            _clusters[c].clear();
          }

        // clear the garbage cluster.
        _garbage_cluster.clear();

        // iterates points and associate each of them with a cluster.
        hash_map<uint32_t,hash_map<uint32_t,float,id_hash_uint>*,id_hash_uint>::const_iterator hit
        = _points.begin();
        while (hit!=_points.end())
          {
            float learning_rate = oskmeans::_nu0*pow((oskmeans::_nuf/oskmeans::_nu0),_t/static_cast<float>(_points.size()*oskmeans::_niterations));

#ifdef DEBUG
            std::cerr << "learning rate: " << learning_rate << std::endl;
#endif

            // find closest cluster to this point.
            short cl = assign_cluster((*hit).first,(*hit).second);

            // recomputation of centroids/medoids.
            if (cl != -1)
              {
                float cl_norm = 0.0;
                recompute_centroid(learning_rate,&_clusters[cl]._c,(*hit).second,cl_norm);
                normalize_centroid(&_clusters[cl]._c,cl_norm);
              }

            ++hit;
            _t++;
          }

        // count iteration.
        _iterations++;
      }
  }
Exemple #2
0
void kmeans(int n, int k){
	int i, count, cluster;
	double percent_change;
	int* cluster_membership;
	double **new_clusters;
	new_clusters = (double **)malloc(k*sizeof(double *));	
	cluster_membership = (int *)malloc(n*sizeof(int));
	count = 0;
	int threshold = 10;
	percent_change = 100;

	//choose first set of cluster centers
	for (i = 0; i < k; i++){
		clusters[i][0] = data[i][0]; 
		clusters[i][1] = data[i][1];
		
	}
	
	//initialize new clusters variable 
	//it has three coords: [x][y][number of members]
	for (i = 0; i < k; i++){
		new_clusters[i] = (double *)malloc(3*sizeof(double));
		new_clusters[i][0] = 0;
		new_clusters[i][1] = 0;
		new_clusters[i][2] = 0;
	}

	for (i = 0; i < n; i++){
		cluster_membership[i] = -1;
	}

	while((percent_change > threshold) && (count < 100)){
		percent_change = 0;
		
		for(i = 0; i < n; i++){
			//find the cluster it belongs to
			cluster = assign_cluster(data[i][0], data[i][1], k);
			
			//check to see if this data point changed clusters
			if (cluster != cluster_membership[i]) {
				percent_change++;
				cluster_membership[i] = cluster;
			}
		
			//track the new cluster sum
			new_clusters[cluster][0] += data[i][0];
			new_clusters[cluster][1] += data[i][1];
			new_clusters[cluster][2]++;
		} 

		//calculate the new cluster centers and update
		for (i = 0; i < k; i++){
			if (new_clusters[i][2] > 0){
				new_clusters[i][0] /= new_clusters[i][2];
				new_clusters[i][1] /= new_clusters[i][2];
			}
			clusters[i][0] = new_clusters[i][0];
			clusters[i][1] = new_clusters[i][1];
			new_clusters[i][0] = new_clusters[i][1] = new_clusters[i][2] = 0;
		}
		percent_change /= n;
		count++;
	}

	for (i = 0; i < n; i++){
		printf("%d %d %d\n", data[i][0], data[i][1], cluster_membership[i]);
	}
	
	
	
}