bool fImgSvm::kmeans(SGMatrix<float64_t> &data , CDenseFeatures<float64_t>* ¢ers ,int32_t num_features) { init_shogun(&print_message); int32_t num_clusters= mwordnum ; int32_t dim_features=SIFTN; float64_t cluster_std_dev=2.0; /* build random cluster centers */ SGMatrix<float64_t> cluster_centers(dim_features, num_clusters); SGVector<float64_t>::random_vector(cluster_centers.matrix, dim_features*num_clusters, 0, 20.0); //SGMatrix<float64_t>::display_matrix(cluster_centers.matrix, cluster_centers.num_rows, // cluster_centers.num_cols, "cluster centers"); /* create features, SG_REF to avoid deletion */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(data); SG_REF(features); /* create labels for cluster centers */ CMulticlassLabels* labels=new CMulticlassLabels(num_features); for (index_t i=0; i<num_features; ++i) labels->set_label(i, 0); /* create distance */ CEuclideanDistance* distance=new CEuclideanDistance(features, features); /* create distance machine */ CKMeans* clustering=new CKMeans(num_clusters, distance); clustering->train(features); /* build clusters */ // CMulticlassLabels* result=CMulticlassLabels::obtain_from_generic(clustering->apply()); // for (index_t i=0; i<result->get_num_labels(); ++i) // SG_SPRINT("cluster index of vector %i: %f\n", i, result->get_label(i)); /* print cluster centers */ centers = (CDenseFeatures<float64_t>*)distance->get_lhs(); SGMatrix<float64_t> centers_matrix=centers->get_feature_matrix(); //SG_UNREF(result); SG_UNREF(centers); SG_UNREF(clustering); SG_UNREF(labels); SG_UNREF(features); exit_shogun(); }
static std::vector<Datatype, Allocator> ComputeClusterCenters(const std::vector<Datatype, Allocator>& data, const std::vector<u_int32_t>& cluster_labels, std::function<Datatype(const std::vector<Datatype, Allocator>&)>& average_fn, const u_int32_t num_clusters) { assert(data.size() == cluster_labels.size()); // Separate the datapoints into their clusters std::vector<std::vector<Datatype, Allocator>> clustered_data(num_clusters); for (size_t idx = 0; idx < data.size(); idx++) { const Datatype& datapoint = data[idx]; const u_int32_t label = cluster_labels[idx]; clustered_data[label].push_back(datapoint); } // Compute the center of each cluster std::vector<Datatype, Allocator> cluster_centers(num_clusters); for (u_int32_t cluster = 0; cluster < num_clusters; cluster++) { const std::vector<Datatype, Allocator>& cluster_data = clustered_data[cluster]; cluster_centers[cluster] = average_fn(cluster_data); } return cluster_centers; }