Example #1
0
void CodeBook::GenKMeans(const float* data, const uint32_t num_data,
                         const uint32_t dim, const uint32_t K)
{
    if (data == NULL)
    {
        fprintf(stderr, "NULL pointer for data\n");
        exit(-1);
    }

    if (num_data < K)
    {
        fprintf(stderr, "number of data must be equal or greater than centers\n");
        exit(-1);
    }

    if (!has_setup_)
        SetUp();

    // initialize centers
    vl_kmeans_init_centers_with_rand_data(kmeans_model_, data, dim, num_data,
                                          K);

    vl_kmeans_refine_centers(kmeans_model_, data, num_data);
}
Example #2
0
VL_EXPORT double
vl_kmeans_cluster (VlKMeans * self,
                   void const * data,
                   vl_size dimension,
                   vl_size numData,
                   vl_size numCenters)
{
  vl_uindex repetition ;
  double bestEnergy = VL_INFINITY_D ;
  void * bestCenters = NULL ;

  for (repetition = 0 ; repetition < self->numRepetitions ; ++ repetition) {
    double energy ;
    double timeRef ;

    if (self->verbosity) {
      VL_PRINTF("kmeans: repetition %d of %d\n", repetition + 1, self->numRepetitions) ;
    }

    timeRef = vl_get_cpu_time() ;
    switch (self->initialization) {
      case VlKMeansRandomSelection :
        vl_kmeans_seed_centers_with_rand_data (self,
                                               data, dimension, numData,
                                               numCenters) ;
        break ;
      case VlKMeansPlusPlus :
        vl_kmeans_seed_centers_plus_plus (self,
                                          data, dimension, numData,
                                          numCenters) ;
        break ;
      default:
        abort() ;
    }

    if (self->verbosity) {
      VL_PRINTF("kmeans: K-means initialized in %.2f s\n",
                vl_get_cpu_time() - timeRef) ;
    }

    timeRef = vl_get_cpu_time () ;
    energy = vl_kmeans_refine_centers (self, data, numData) ;
    if (self->verbosity) {
      VL_PRINTF("kmeans: K-means termineted in %.2f s with energy %g\n",
                vl_get_cpu_time() - timeRef, energy) ;
    }

    /* copy centers to output if current solution is optimal */
    if (energy < bestEnergy) {
      void * temp ;
      bestEnergy = energy ;

      if (bestCenters == NULL) {
        bestCenters = vl_malloc(vl_get_type_size(self->dataType) *
                                self->dimension *
                                self->numCenters) ;
      }

      /* swap buffers */
      temp = bestCenters ;
      bestCenters = self->centers ;
      self->centers = temp ;
    } /* better energy */
  } /* next repetition */

  vl_free (self->centers) ;
  self->centers = bestCenters ;
  return bestEnergy ;
}
void kmeans()
{
	const vl_size data_dim = 2;
	const vl_size num_data = 1000;

	float data[data_dim * num_data] = { 0.0f, };
	for (vl_size i = 0; i < data_dim * num_data; ++i)
		data[i] = (float)std::rand() / RAND_MAX;

	//
	std::cout << "start processing ..." << std::endl;

	const vl_size num_clusters = 3;
	const VlVectorComparisonType distance = VlDistanceL2;
	const vl_size num_max_iterations = 100;

	VlKMeans *kmeans = vl_kmeans_new(VL_TYPE_DOUBLE, distance);

	vl_kmeans_set_max_num_iterations(kmeans, num_max_iterations);

	if (true)
	{
		// initialization.
#if 1
		vl_kmeans_init_centers_with_rand_data(kmeans, (void const *)data, data_dim, num_data, num_clusters);
#elif 0
		vl_kmeans_init_centers_plus_plus(kmeans, (void const *)data, data_dim, num_data, num_clusters);
#else
		{
			float init_centers[data_dim * num_clusters] = { 0.0f, };
			for (vl_size i = 0; i < data_dim * num_clusters; ++i)
				init_centers[i] = (float)std::rand() / RAND_MAX;
			vl_kmeans_set_centers(kmeans, (void const *)init_centers, data_dim, num_clusters);
		}
#endif

		// clustering.
		const double energy = vl_kmeans_refine_centers(kmeans, data, num_data);
	}
	else
	{
		const VlKMeansAlgorithm algorithm = VlKMeansLloyd;  // VlKMeansLloyd, VlKMeansElkan, VlKMeansANN.
		const VlKMeansInitialization initialization = VlKMeansRandomSelection;  // VlKMeansRandomSelection, VlKMeansPlusPlus.
		const vl_size num_repetitions = 100;

		vl_kmeans_set_algorithm(kmeans, VlKMeansLloyd);
		vl_kmeans_set_initialization(kmeans, VlKMeansRandomSelection);
		vl_kmeans_set_num_repetitions(kmeans, num_repetitions);

		// clustering.
		const double energy = vl_kmeans_cluster(kmeans, (void const *)data, data_dim, num_data, num_clusters);
	}

	//
	const vl_size num_iterations = vl_kmeans_get_num_repetitions(kmeans);
	//const vl_type data_type = vl_kmeans_get_data_type(kmeans);

	//
	{
		const float *centers = (float *)vl_kmeans_get_centers(kmeans);
		for (int i = 0; i < num_clusters; ++i)
		{
			std::cout << '(';
			for (int j = 0; j < data_dim; ++j)
			{
				if (j) std::cout << ',';
				std::cout << centers[i * data_dim + j];
			}
			std::cout << ')' << std::endl;
		}
	}

	//
	{
		vl_uint32 assignments[num_data] = { 0, };
		double distances[num_data] = { 0, };
		vl_kmeans_quantize(kmeans, assignments, (void *)distances, (void const *)data, num_data);

		for (int i = 0; i < num_data; ++i)
		{
			std::cout << '(';
			for (int j = 0; j < data_dim; ++j)
			{
				if (j) std::cout << ',';
				std::cout << data[i * data_dim + j];  // TODO [check] >> is it correct?
			}
			std::cout << ") => " << assignments[i] << std::endl;
		}
	}

	std::cout << "end processing ..." << std::endl;

	//
	if (kmeans)
	{
		vl_kmeans_delete(kmeans);
		kmeans = NULL;
	}
}