void JLinkage::clusterPSMatrix(std::vector<std::vector<int> >& result) { /* allocate memory */ std::vector<std::vector<double> > distances(line_count, std::vector<double>(line_count, 0)); std::vector<std::vector<int> > clusters(line_count, std::vector<int>(line_count, 0)); std::vector<int> indicators(line_count, 1); /* initialization */ double minDis = 1; int indexA = 0, indexB = 0; for(int i = 0; i < line_count; i++) { clusters[i][i] = 1; for(int j = i + 1; j < line_count; j++) { const double jd = jaccardDist(PSMatrix[i], PSMatrix[j]); distances[i][j] = jd; distances[j][i] = jd; if(jd < minDis) { minDis = jd; indexA = i; indexB = j; } } } while(minDis != 1) { /* merge two clusters */ for(int i = 0; i < line_count; i++) { if(clusters[indexA][i] || clusters[indexB][i]) clusters[indexA][i] = clusters[indexB][i] = 1; } indicators[indexB] = 0; for(int i = 0; i < JLINKAGE_MODEL_SIZE; i++) { PSMatrix[indexA] = PSMatrix[indexB] = PSMatrix[indexA]&PSMatrix[indexB]; } /* recalculate distance */ for(int i = 0; i < line_count; i++) { distances[indexA][i] = jaccardDist(PSMatrix[indexA], PSMatrix[i]); distances[i][indexA] = distances[indexA][i]; } /* find minimum distance */ minDis = 1; for(int i = 0; i < line_count; i++) { if(indicators[i] == 0) continue; for(int j = i + 1; j < line_count; j++) { if(indicators[j] == 0) continue; if(distances[i][j] < minDis) { minDis = distances[i][j]; indexA = i; indexB = j; } } } } /* calculate cluster size */ std::vector<int> clusterSizes(line_count); for(int i = 0; i < line_count; i++) { int cs = 0; if(indicators[i]) { for(int j = 0; j < line_count; j++) { if(clusters[i][j]) ++cs; } } clusterSizes[i] = cs; } const int cluster_num = 3; result.clear(); result.resize(cluster_num, std::vector<int>(line_count)); /* choose the largest three clusters */ int count = 0; while(count < cluster_num) { int max_index = 0; int max_size = clusterSizes[0]; for(int i = 1; i < line_count; i++) { if(max_size < clusterSizes[i]) { max_size = clusterSizes[i]; max_index = i; } } result[count] = clusters[max_index]; count++; clusterSizes[max_index] = 0; } /* print clusters */ /* for(int i = 0; i < cluster_num; i++) { printf("Cluster %d:\n", i); for(int j = 0; j < line_count; j++) { if(result[i][j]) printf("%d ", j); } printf("\n"); } */ }
void NetworKit::IntrapartitionDensity::run() { hasRun = false; Aux::SignalHandler handler; minimumValue = std::numeric_limits< double >::max(); maximumValue = std::numeric_limits< double >::lowest(); unweightedAverage = 0; weightedAverage = 0; values.clear(); std::vector<count> clusterSizes(P.upperBound(), 0); std::vector<count> intraEdges(P.upperBound(), 0); handler.assureRunning(); G.forEdges([&](node u, node v) { if (P[u] == P[v]) { ++intraEdges[P[u]]; } }); handler.assureRunning(); G.forNodes([&](node u) { ++clusterSizes[P[u]]; }); handler.assureRunning(); count numClusters = 0; count intraEdgesSum = 0; count possibleIntraEdgesSum = 0; values.resize(P.upperBound(), 0); for (index i = 0; i < clusterSizes.size(); ++i) { if (clusterSizes[i] > 0) { double id = 1; count possibleEdges = clusterSizes[i] * (clusterSizes[i]-1) / 2; if (possibleEdges > 0) { id = intraEdges[i] * 1.0 / possibleEdges; } values[i] = id; unweightedAverage += id; weightedAverage += id * clusterSizes[i]; minimumValue = std::min(id, minimumValue); maximumValue = std::max(id, maximumValue); possibleIntraEdgesSum += possibleEdges; intraEdgesSum += intraEdges[i]; ++numClusters; } else { clusterSizes[i] = none; } } handler.assureRunning(); unweightedAverage /= numClusters; weightedAverage /= G.numberOfNodes(); globalValue = intraEdgesSum * 1.0 / possibleIntraEdgesSum; hasRun = true; }