Esempio n. 1
0
void JLinkage::clusterPSMatrix(std::vector<std::vector<int> >& result)
{
  /* allocate memory */
	std::vector<std::vector<double> > distances(line_count, 
			std::vector<double>(line_count, 0));
	std::vector<std::vector<int> > clusters(line_count,
			std::vector<int>(line_count, 0));
	std::vector<int> indicators(line_count, 1);

	/* initialization */
	double minDis = 1;
	int indexA = 0, indexB = 0;
	for(int i = 0; i < line_count; i++)
	{
		clusters[i][i] = 1;
		for(int j = i + 1; j < line_count; j++)
		{
			const double jd = jaccardDist(PSMatrix[i], PSMatrix[j]);
			distances[i][j] = jd;
			distances[j][i] = jd;
			if(jd < minDis)
			{
				minDis = jd;
				indexA = i;
				indexB = j;
			}
		}
	}

	while(minDis != 1)
	{
		/* merge two clusters */
		for(int i = 0; i < line_count; i++)
		{
			if(clusters[indexA][i] || clusters[indexB][i])
				clusters[indexA][i] = clusters[indexB][i] = 1;
		}
		indicators[indexB] = 0;
		for(int i = 0; i < JLINKAGE_MODEL_SIZE; i++)
		{
			PSMatrix[indexA] = PSMatrix[indexB] = PSMatrix[indexA]&PSMatrix[indexB];
		}

		/* recalculate distance */
		for(int i = 0; i < line_count; i++)
		{
			distances[indexA][i] = jaccardDist(PSMatrix[indexA], PSMatrix[i]);
			distances[i][indexA] = distances[indexA][i];
		}

		/* find minimum distance */
		minDis = 1;
		for(int i = 0; i < line_count; i++)
		{
			if(indicators[i] == 0) continue;
			for(int j = i + 1; j < line_count; j++)
			{
				if(indicators[j] == 0) continue;
				if(distances[i][j] < minDis)
				{
					minDis = distances[i][j];
					indexA = i;
					indexB = j;
				}
			} 
		}
	}

 	/* calculate cluster size */
	std::vector<int> clusterSizes(line_count);
	for(int i = 0; i < line_count; i++)
	{
		int cs = 0;
		if(indicators[i])
		{
			for(int j = 0; j < line_count; j++)
			{
				if(clusters[i][j]) ++cs;
			}
		}
		clusterSizes[i] = cs;
	}

	const int cluster_num = 3;
	result.clear();
	result.resize(cluster_num, std::vector<int>(line_count)); /* choose the largest three clusters */


	int count = 0;
	while(count < cluster_num)
	{
		int max_index = 0;
		int max_size = clusterSizes[0];
		for(int i = 1; i < line_count; i++)
		{
			if(max_size < clusterSizes[i])
			{
				max_size = clusterSizes[i];
				max_index = i;
			}
		}
		result[count] = clusters[max_index];
		count++;
		clusterSizes[max_index] = 0;
	}

	/* print clusters */
	/*
	for(int i = 0; i < cluster_num; i++)
	{
		printf("Cluster %d:\n", i);
		for(int j = 0; j < line_count; j++)
		{
			if(result[i][j])
				printf("%d ", j);
		}
		printf("\n");
	}
	*/
}
void NetworKit::IntrapartitionDensity::run() {
	hasRun = false;

	Aux::SignalHandler handler;

	minimumValue = std::numeric_limits< double >::max();
	maximumValue  = std::numeric_limits< double >::lowest();
	unweightedAverage = 0;
	weightedAverage = 0;
	values.clear();

	std::vector<count> clusterSizes(P.upperBound(), 0);
	std::vector<count> intraEdges(P.upperBound(), 0);

	handler.assureRunning();

	G.forEdges([&](node u, node v) {
		if (P[u] == P[v]) {
			++intraEdges[P[u]];
		}
	});

	handler.assureRunning();

	G.forNodes([&](node u) {
		++clusterSizes[P[u]];
	});

	handler.assureRunning();

	count numClusters = 0;
	count intraEdgesSum = 0;
	count possibleIntraEdgesSum = 0;

	values.resize(P.upperBound(), 0);

	for (index i = 0; i < clusterSizes.size(); ++i) {
		if (clusterSizes[i] > 0) {
			double id = 1;
			count possibleEdges = clusterSizes[i] * (clusterSizes[i]-1) / 2;
			if (possibleEdges > 0) {
				id = intraEdges[i] * 1.0 / possibleEdges;
			}

			values[i] = id;

			unweightedAverage += id;
			weightedAverage += id * clusterSizes[i];
			minimumValue = std::min(id, minimumValue);
			maximumValue = std::max(id, maximumValue);

			possibleIntraEdgesSum += possibleEdges;
			intraEdgesSum += intraEdges[i];
			++numClusters;
		} else {
			clusterSizes[i] = none;
		}
	}

	handler.assureRunning();

	unweightedAverage /= numClusters;
	weightedAverage /= G.numberOfNodes();

	globalValue = intraEdgesSum * 1.0 / possibleIntraEdgesSum;
	hasRun = true;
}