Пример #1
0
util::Clustering::Clustering(const DataVector& _data, unsigned _max) : m_maxClusters(_max) {
	m_data.insert(m_data.begin(), _data.begin(), _data.end());
	
	ClusterMap clusterQualityScores;
	
	for (unsigned clusterCount = 1; clusterCount <= m_maxClusters; clusterCount++) {
		clusterQualityScores[clusterCount] = _kmeans(clusterCount);
	}
		
	std::vector< CountClusterPair > sortedScores;
	std::copy(clusterQualityScores.begin(), clusterQualityScores.end(), std::back_inserter(sortedScores));
	ScoreComparator comparator;
	std::sort(sortedScores.begin(), sortedScores.end(), comparator);

	report("Scores:");
	for (int i = 0; i < sortedScores.size(); i++) {
		report(sortedScores[i].first << " clusters: " << sortedScores[i].second.getScore());
	}
	
	report("Clustering with highest score: ");
	report(util::Indents(2) << "cluster count: " << sortedScores[0].first);
	report(util::Indents(2) << "aggregate score: " << sortedScores[0].second.getScore());
	report(util::Indents(2) << "detected clusters:");
	for (auto it = sortedScores[0].second.getClusters().begin(); it != sortedScores[0].second.getClusters().end(); ++it) {
		const Cluster& cluster = *it;
		report(util::Indents(4) << "position = " << cluster.position << ", elements = " << cluster.data.size());
	}
	
	result = sortedScores[0].second;
}
Пример #2
0
int kmeans(double **data, double **centroids, int *membership, \
            double *inertia, int rank, int size, int *ppp, mytimer *t, options opt) {
  int i, iterations = 0;
  double **temp_centroids = (double**) alloc2d(opt.n_centroids, opt.dimensions);
  int *temp_membership = (int*) calloc(opt.local_rows, sizeof(int));
  check(temp_membership);
  double temp_inertia = DBL_MAX;
  for(i = 0; i < opt.trials; i++){
    // MPI_Barrier(MPI_COMM_WORLD);
    if(opt.verbose > 1 && rank == 0) printf("\nTRIAL %d\n", i+1);
    iterations += _kmeans(data, temp_centroids, temp_membership, &temp_inertia, rank, size, ppp, t, opt);
    if(temp_inertia < *inertia) {
      *inertia = temp_inertia;
      memcpy(*centroids, *temp_centroids, opt.n_centroids * opt.dimensions * sizeof(double));
      memcpy(membership, temp_membership, opt.local_rows * sizeof(int));
    }
  }
  free(*temp_centroids);
  free(temp_centroids);
  free(temp_membership);

  return iterations;
}