Ejemplo n.º 1
0
bool fImgSvm::kmeans(SGMatrix<float64_t> &data ,  CDenseFeatures<float64_t>*  &centers ,int32_t num_features)
{
    init_shogun(&print_message);


    int32_t num_clusters= mwordnum ;


    int32_t dim_features=SIFTN;

    float64_t cluster_std_dev=2.0;

    /* build random cluster centers */
    SGMatrix<float64_t> cluster_centers(dim_features, num_clusters);
    SGVector<float64_t>::random_vector(cluster_centers.matrix, dim_features*num_clusters,
                                       0, 20.0);
    //SGMatrix<float64_t>::display_matrix(cluster_centers.matrix, cluster_centers.num_rows,
    //		cluster_centers.num_cols, "cluster centers");




    /* create features, SG_REF to avoid deletion */
    CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
    features->set_feature_matrix(data);
    SG_REF(features);

    /* create labels for cluster centers */
    CMulticlassLabels* labels=new CMulticlassLabels(num_features);
    for (index_t i=0; i<num_features; ++i)
        labels->set_label(i, 0);

    /* create distance */
    CEuclideanDistance* distance=new CEuclideanDistance(features, features);

    /* create distance machine */
    CKMeans* clustering=new CKMeans(num_clusters, distance);
    clustering->train(features);

    /* build clusters */
//	CMulticlassLabels* result=CMulticlassLabels::obtain_from_generic(clustering->apply());
//	for (index_t i=0; i<result->get_num_labels(); ++i)
//		SG_SPRINT("cluster index of vector %i: %f\n", i, result->get_label(i));

    /* print cluster centers */
    centers = (CDenseFeatures<float64_t>*)distance->get_lhs();

    SGMatrix<float64_t> centers_matrix=centers->get_feature_matrix();


    //SG_UNREF(result);
    SG_UNREF(centers);
    SG_UNREF(clustering);
    SG_UNREF(labels);
    SG_UNREF(features);

    exit_shogun();
}
 static std::vector<Datatype, Allocator> ComputeClusterCenters(const std::vector<Datatype, Allocator>& data, const std::vector<u_int32_t>& cluster_labels, std::function<Datatype(const std::vector<Datatype, Allocator>&)>& average_fn, const u_int32_t num_clusters)
 {
     assert(data.size() == cluster_labels.size());
     // Separate the datapoints into their clusters
     std::vector<std::vector<Datatype, Allocator>> clustered_data(num_clusters);
     for (size_t idx = 0; idx < data.size(); idx++)
     {
         const Datatype& datapoint = data[idx];
         const u_int32_t label = cluster_labels[idx];
         clustered_data[label].push_back(datapoint);
     }
     // Compute the center of each cluster
     std::vector<Datatype, Allocator> cluster_centers(num_clusters);
     for (u_int32_t cluster = 0; cluster < num_clusters; cluster++)
     {
         const std::vector<Datatype, Allocator>& cluster_data = clustered_data[cluster];
         cluster_centers[cluster] = average_fn(cluster_data);
     }
     return cluster_centers;
 }