Пример #1
0
void CodeBook::GenKMeans(const float* data, const uint32_t num_data,
                         const uint32_t dim, const uint32_t K)
{
    if (data == NULL)
    {
        fprintf(stderr, "NULL pointer for data\n");
        exit(-1);
    }

    if (num_data < K)
    {
        fprintf(stderr, "number of data must be equal or greater than centers\n");
        exit(-1);
    }

    if (!has_setup_)
        SetUp();

    // initialize centers
    vl_kmeans_init_centers_with_rand_data(kmeans_model_, data, dim, num_data,
                                          K);

    vl_kmeans_refine_centers(kmeans_model_, data, num_data);
}
void kmeans()
{
	const vl_size data_dim = 2;
	const vl_size num_data = 1000;

	float data[data_dim * num_data] = { 0.0f, };
	for (vl_size i = 0; i < data_dim * num_data; ++i)
		data[i] = (float)std::rand() / RAND_MAX;

	//
	std::cout << "start processing ..." << std::endl;

	const vl_size num_clusters = 3;
	const VlVectorComparisonType distance = VlDistanceL2;
	const vl_size num_max_iterations = 100;

	VlKMeans *kmeans = vl_kmeans_new(VL_TYPE_DOUBLE, distance);

	vl_kmeans_set_max_num_iterations(kmeans, num_max_iterations);

	if (true)
	{
		// initialization.
#if 1
		vl_kmeans_init_centers_with_rand_data(kmeans, (void const *)data, data_dim, num_data, num_clusters);
#elif 0
		vl_kmeans_init_centers_plus_plus(kmeans, (void const *)data, data_dim, num_data, num_clusters);
#else
		{
			float init_centers[data_dim * num_clusters] = { 0.0f, };
			for (vl_size i = 0; i < data_dim * num_clusters; ++i)
				init_centers[i] = (float)std::rand() / RAND_MAX;
			vl_kmeans_set_centers(kmeans, (void const *)init_centers, data_dim, num_clusters);
		}
#endif

		// clustering.
		const double energy = vl_kmeans_refine_centers(kmeans, data, num_data);
	}
	else
	{
		const VlKMeansAlgorithm algorithm = VlKMeansLloyd;  // VlKMeansLloyd, VlKMeansElkan, VlKMeansANN.
		const VlKMeansInitialization initialization = VlKMeansRandomSelection;  // VlKMeansRandomSelection, VlKMeansPlusPlus.
		const vl_size num_repetitions = 100;

		vl_kmeans_set_algorithm(kmeans, VlKMeansLloyd);
		vl_kmeans_set_initialization(kmeans, VlKMeansRandomSelection);
		vl_kmeans_set_num_repetitions(kmeans, num_repetitions);

		// clustering.
		const double energy = vl_kmeans_cluster(kmeans, (void const *)data, data_dim, num_data, num_clusters);
	}

	//
	const vl_size num_iterations = vl_kmeans_get_num_repetitions(kmeans);
	//const vl_type data_type = vl_kmeans_get_data_type(kmeans);

	//
	{
		const float *centers = (float *)vl_kmeans_get_centers(kmeans);
		for (int i = 0; i < num_clusters; ++i)
		{
			std::cout << '(';
			for (int j = 0; j < data_dim; ++j)
			{
				if (j) std::cout << ',';
				std::cout << centers[i * data_dim + j];
			}
			std::cout << ')' << std::endl;
		}
	}

	//
	{
		vl_uint32 assignments[num_data] = { 0, };
		double distances[num_data] = { 0, };
		vl_kmeans_quantize(kmeans, assignments, (void *)distances, (void const *)data, num_data);

		for (int i = 0; i < num_data; ++i)
		{
			std::cout << '(';
			for (int j = 0; j < data_dim; ++j)
			{
				if (j) std::cout << ',';
				std::cout << data[i * data_dim + j];  // TODO [check] >> is it correct?
			}
			std::cout << ") => " << assignments[i] << std::endl;
		}
	}

	std::cout << "end processing ..." << std::endl;

	//
	if (kmeans)
	{
		vl_kmeans_delete(kmeans);
		kmeans = NULL;
	}
}