Esempio n. 1
0
util::Clustering::Clustering(const DataVector& _data, unsigned _max) : m_maxClusters(_max) {
	m_data.insert(m_data.begin(), _data.begin(), _data.end());
	
	ClusterMap clusterQualityScores;
	
	for (unsigned clusterCount = 1; clusterCount <= m_maxClusters; clusterCount++) {
		clusterQualityScores[clusterCount] = _kmeans(clusterCount);
	}
		
	std::vector< CountClusterPair > sortedScores;
	std::copy(clusterQualityScores.begin(), clusterQualityScores.end(), std::back_inserter(sortedScores));
	ScoreComparator comparator;
	std::sort(sortedScores.begin(), sortedScores.end(), comparator);

	report("Scores:");
	for (int i = 0; i < sortedScores.size(); i++) {
		report(sortedScores[i].first << " clusters: " << sortedScores[i].second.getScore());
	}
	
	report("Clustering with highest score: ");
	report(util::Indents(2) << "cluster count: " << sortedScores[0].first);
	report(util::Indents(2) << "aggregate score: " << sortedScores[0].second.getScore());
	report(util::Indents(2) << "detected clusters:");
	for (auto it = sortedScores[0].second.getClusters().begin(); it != sortedScores[0].second.getClusters().end(); ++it) {
		const Cluster& cluster = *it;
		report(util::Indents(4) << "position = " << cluster.position << ", elements = " << cluster.data.size());
	}
	
	result = sortedScores[0].second;
}
Esempio n. 2
0
void CodeAtlas::RandomWalkClusterer::randomWalk()
{
	srand(m_seed);
	int nVtx = m_vtx.size();

	typedef QHash<int, double> ClusterMap;
	typedef QHash<int, float> EdgeMap; 
	for (int ithIter = 0 ; ithIter < m_maxIter;)
	{
		// choose a vtx to modify randomly
		int curVtxID = rand()%nVtx;//randInt(nVtx);
		//printf("vtx %d\n", curVtxID);
		Vertex& curVtx= m_vtx[curVtxID];

		// collect near cluster
		ClusterMap nearClusterWeight;
		for (EdgeMap::Iterator pE = curVtx.m_outEdge.begin(); pE != curVtx.m_outEdge.end(); ++pE)
		{
			int tarID = pE.key();
			double w   = pE.value();
			if (w == 0)
				continue;
			Vertex& nearVtx = m_vtx[tarID];
			if (nearClusterWeight.contains(nearVtx.m_clusterID))
			{
				nearClusterWeight[nearVtx.m_clusterID] += w;
			}
			else
			{
				nearClusterWeight[nearVtx.m_clusterID] = w;
			}
		}
		if (nearClusterWeight.size() == 0)
			continue;

		// compute probability
		double maxWeight = 0;
		for (ClusterMap::Iterator pC = nearClusterWeight.begin(); pC != nearClusterWeight.end(); ++pC)
		{
			//printf("%lf ", pC.value());
			maxWeight = max(maxWeight, pC.value());
		}
		//printf("\n");
		double weightSum = 0;
		double t = m_t;
		for (ClusterMap::Iterator pC = nearClusterWeight.begin(); pC != nearClusterWeight.end(); ++pC)
		{
			double w = exp((pC.value() - maxWeight)/ t);
			pC.value() = w;
			weightSum += w;
		}
		for (ClusterMap::Iterator pC = nearClusterWeight.begin(); pC != nearClusterWeight.end(); ++pC)
		{
			pC.value() /= weightSum;
			//printf("%lf ", pC.value());
		}
		//printf("\n");

		// choose new cluster
		double prob = randFloat();
		double accProb = 0;
		int   newCluster = -1;
		for (ClusterMap::Iterator pC = nearClusterWeight.begin(); pC != nearClusterWeight.end(); ++pC)
		{
			accProb += pC.value();
			if (accProb > prob)
			{
				newCluster = pC.key();
			}
		}
		// prevent round-off error
		if (newCluster == -1)
		{
			newCluster = (nearClusterWeight.end() - 1).key();
		}

		//printf("old: %d new: %d\n\n", curVtx.m_clusterID, newCluster);
		// update graph
		curVtx.m_clusterID = newCluster;
		++ithIter;
	}
}