void CodeBook::GenKMeans(const float* data, const uint32_t num_data, const uint32_t dim, const uint32_t K) { if (data == NULL) { fprintf(stderr, "NULL pointer for data\n"); exit(-1); } if (num_data < K) { fprintf(stderr, "number of data must be equal or greater than centers\n"); exit(-1); } if (!has_setup_) SetUp(); // initialize centers vl_kmeans_init_centers_with_rand_data(kmeans_model_, data, dim, num_data, K); vl_kmeans_refine_centers(kmeans_model_, data, num_data); }
VL_EXPORT double vl_kmeans_cluster (VlKMeans * self, void const * data, vl_size dimension, vl_size numData, vl_size numCenters) { vl_uindex repetition ; double bestEnergy = VL_INFINITY_D ; void * bestCenters = NULL ; for (repetition = 0 ; repetition < self->numRepetitions ; ++ repetition) { double energy ; double timeRef ; if (self->verbosity) { VL_PRINTF("kmeans: repetition %d of %d\n", repetition + 1, self->numRepetitions) ; } timeRef = vl_get_cpu_time() ; switch (self->initialization) { case VlKMeansRandomSelection : vl_kmeans_seed_centers_with_rand_data (self, data, dimension, numData, numCenters) ; break ; case VlKMeansPlusPlus : vl_kmeans_seed_centers_plus_plus (self, data, dimension, numData, numCenters) ; break ; default: abort() ; } if (self->verbosity) { VL_PRINTF("kmeans: K-means initialized in %.2f s\n", vl_get_cpu_time() - timeRef) ; } timeRef = vl_get_cpu_time () ; energy = vl_kmeans_refine_centers (self, data, numData) ; if (self->verbosity) { VL_PRINTF("kmeans: K-means termineted in %.2f s with energy %g\n", vl_get_cpu_time() - timeRef, energy) ; } /* copy centers to output if current solution is optimal */ if (energy < bestEnergy) { void * temp ; bestEnergy = energy ; if (bestCenters == NULL) { bestCenters = vl_malloc(vl_get_type_size(self->dataType) * self->dimension * self->numCenters) ; } /* swap buffers */ temp = bestCenters ; bestCenters = self->centers ; self->centers = temp ; } /* better energy */ } /* next repetition */ vl_free (self->centers) ; self->centers = bestCenters ; return bestEnergy ; }
void kmeans() { const vl_size data_dim = 2; const vl_size num_data = 1000; float data[data_dim * num_data] = { 0.0f, }; for (vl_size i = 0; i < data_dim * num_data; ++i) data[i] = (float)std::rand() / RAND_MAX; // std::cout << "start processing ..." << std::endl; const vl_size num_clusters = 3; const VlVectorComparisonType distance = VlDistanceL2; const vl_size num_max_iterations = 100; VlKMeans *kmeans = vl_kmeans_new(VL_TYPE_DOUBLE, distance); vl_kmeans_set_max_num_iterations(kmeans, num_max_iterations); if (true) { // initialization. #if 1 vl_kmeans_init_centers_with_rand_data(kmeans, (void const *)data, data_dim, num_data, num_clusters); #elif 0 vl_kmeans_init_centers_plus_plus(kmeans, (void const *)data, data_dim, num_data, num_clusters); #else { float init_centers[data_dim * num_clusters] = { 0.0f, }; for (vl_size i = 0; i < data_dim * num_clusters; ++i) init_centers[i] = (float)std::rand() / RAND_MAX; vl_kmeans_set_centers(kmeans, (void const *)init_centers, data_dim, num_clusters); } #endif // clustering. const double energy = vl_kmeans_refine_centers(kmeans, data, num_data); } else { const VlKMeansAlgorithm algorithm = VlKMeansLloyd; // VlKMeansLloyd, VlKMeansElkan, VlKMeansANN. const VlKMeansInitialization initialization = VlKMeansRandomSelection; // VlKMeansRandomSelection, VlKMeansPlusPlus. const vl_size num_repetitions = 100; vl_kmeans_set_algorithm(kmeans, VlKMeansLloyd); vl_kmeans_set_initialization(kmeans, VlKMeansRandomSelection); vl_kmeans_set_num_repetitions(kmeans, num_repetitions); // clustering. const double energy = vl_kmeans_cluster(kmeans, (void const *)data, data_dim, num_data, num_clusters); } // const vl_size num_iterations = vl_kmeans_get_num_repetitions(kmeans); //const vl_type data_type = vl_kmeans_get_data_type(kmeans); // { const float *centers = (float *)vl_kmeans_get_centers(kmeans); for (int i = 0; i < num_clusters; ++i) { std::cout << '('; for (int j = 0; j < data_dim; ++j) { if (j) std::cout << ','; std::cout << centers[i * data_dim + j]; } std::cout << ')' << std::endl; } } // { vl_uint32 assignments[num_data] = { 0, }; double distances[num_data] = { 0, }; vl_kmeans_quantize(kmeans, assignments, (void *)distances, (void const *)data, num_data); for (int i = 0; i < num_data; ++i) { std::cout << '('; for (int j = 0; j < data_dim; ++j) { if (j) std::cout << ','; std::cout << data[i * data_dim + j]; // TODO [check] >> is it correct? } std::cout << ") => " << assignments[i] << std::endl; } } std::cout << "end processing ..." << std::endl; // if (kmeans) { vl_kmeans_delete(kmeans); kmeans = NULL; } }