コード例 #1
0
/*
 * Cluster all the samples again.
 */
bool OfflineKMeans::reCluster() {
	bool clusterChanged = false;

    for(int i = 0; i < sampleNum; ++i) {
		const float* sample = samples.getSample(i);

		int oldCluster = clusterIndexes[i];
		int newCluster = cluster(sample, featureNum);
		if(newCluster != oldCluster) {
			clusterChanged = true;

#ifdef OKM_DBG
			std::cout<<"i = "<<i<<", oldCluster = "<<oldCluster<<", newCluster = "
				<<newCluster<<std::endl;
#endif
			// move sample from the old cluster to the new cluster.
			clusters[oldCluster].second.erase(const_cast<float*>(sample));
			clusters[newCluster].second.insert(const_cast<float*>(sample));

			/*
			 * set clusterIndexes[i] to newCluster to indicate now sample 
			 * indexed by 'i' is in cluster indexed by 'newCluster'.
			 */
			clusterIndexes[i] = newCluster;
		}
	}
	if(clusterChanged == true)
		updateCentroids();

	return clusterChanged;
}
コード例 #2
0
ファイル: Clustering.hpp プロジェクト: 10341074/pacs
void Clustering<N, ObjectT, DistanceP>::apply( objectList_T const & objects )
{
    // iteration loop truncated at maxIt
    for( int it = 0; it < M_maxIt; it++ )
    {
        if( M_verbose )
            std::cout << "=======================" << std::endl
                      << "iteration " << it << std::endl;

        // clean up object.list
        for( size_t d = 0; d < N; d++ )
            M_objectList[d].clear();

        // loop on the object set
        for( size_t kObj = 0; kObj < objects.size(); kObj++ )
        {
            // compute the distance of the k-th object from all the centroids
            std::array<real,N> distances;
            for( size_t d = 0; d < N; d++ )
            {
                distances[d] = M_distancePolicy( objects[kObj],
                                                 M_centroids[d] );
            }

            // find the minimum distance
            int minPos = std::min_element( distances.begin(), distances.end() )
                    - distances.begin();

            // insert the object in the list of the closest centroid
            M_objectList[minPos].push_back( objects[kObj] );
        }

        if( M_verbose )
        {
            for( size_t d = 0; d < N; d++ )
            {
                std::cout << "centroid " << d << " has "
                          << M_objectList[d].size() << " objects" << std::endl;
                for( size_t j = 0; j < M_objectList[d].size(); j++ )
                    std::cout << M_objectList[d][j] << std::endl;
            }
        }

        bool const isIncremented = updateCentroids();

        printGnuplot( it );

        if ( !isIncremented )
        {
            if ( M_verbose )
                std::cout << "iteration interrupted at " << it << std::endl;
            break;
        }
    }

    computeQuality();
}
コード例 #3
0
void OfflineKMeans::initClusters() {
	// calculate the minimum and maximum values of each feature
	std::pair<float, float>* featureRanges = new std::pair<float, float>[featureNum];
	for(int j = 0; j < featureNum; ++j) {
		float minFeatureJ = std::numeric_limits<float>::max();
		float maxFeatureJ = std::numeric_limits<float>::min();
		for(int i = 0; i < sampleNum; ++i) {
			float feature = samples.getSample(i)[j];
			if(minFeatureJ > feature)
				minFeatureJ = feature;
			if(maxFeatureJ < feature)
				maxFeatureJ = feature;
		}

		featureRanges[j].first = minFeatureJ;
		featureRanges[j].second = maxFeatureJ;
	}

	// initialize centroids of clusters.
	clusters = new std::pair<float*, std::set<float*> >[clusterNum];
	for(int i = 0; i < clusterNum; ++i) {
		float* centroid = new float[featureNum];
		srand(i+1);
		float scale = static_cast<float>(rand()) / RAND_MAX;
		for(int j = 0; j < featureNum; ++j) {
			float minFeatureJ = featureRanges[j].first;
			float maxFeatureJ = featureRanges[j].second;
			centroid[j] = minFeatureJ + (maxFeatureJ - minFeatureJ) * scale;
		}
		clusters[i].first = centroid;
	}
	delete[] featureRanges;
#ifdef OKM_DBG
	std::cout<<"Centroids of clusters initialized randomly:"<<std::endl;
	for(int i = 0; i < clusterNum; ++i) {
		std::cout<<"<";
		float* centroid = clusters[i].first;
		int j;
		for(j = 0; j < featureNum-1; ++j)
			std::cout<<centroid[j]<<" ";
		std::cout<<centroid[j]<<">"<<std::endl;
	}
#endif

	/* cluster all samples for the first time.
	 * setup mapping from samples to clusters and centroids of clusters.
	 */
	clusterIndexes = new int[sampleNum];
	for(int i = 0; i < sampleNum; ++i) {
		const float* sample = samples.getSample(i);

		int targetCluster = cluster(sample, featureNum);
		clusterIndexes[i] = targetCluster;
		clusters[targetCluster].second.insert(const_cast<float*>(sample));
	}
	updateCentroids();
}