void testCoarsening(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int ct) { GraphType graph, *cgraph; CtrlType ctrl; my_SetUpGraph(&graph, *nvtxs, xadj, adjncy, vwgt, adjwgt, *wgtflag, 1); /* The last argument indicates we are setting up the original * graph */ ctrl.CoarsenTo=ct; ctrl.CType=MATCH_SHEMN; my_AllocateWorkSpace(&ctrl,&graph); cgraph = Coarsen2Way(&ctrl,&graph); do { dump_graph(cgraph); cgraph = cgraph->finer; } while ( cgraph != NULL ); }
void srmcl(int* nvtxs, idxtype* xadj, idxtype* adjncy, idxtype *vwgt, idxtype* adjwgt, int* wgtflag, std::vector<std::set<idxtype> >& indices, Options opt) { GraphType *graph = (GraphType*)malloc(sizeof(GraphType)); my_SetUpGraph(graph, *nvtxs, xadj, adjncy, vwgt, adjwgt, *wgtflag, 1); std::vector< std::set<idxtype> > clusters = srmclWithGraph(graph, indices, opt); //main procedure! //post processing: prune out clusters printf("start post-processing - prune clusters.\n"); int num_clusters = clusters.size(); double max_weight = 10000; //printf("double max_weight = 10000;\n"); //prune out clusters according to their clustering coefficient #ifdef CLUSTER_COEFFICIENT double* cluster_coefficients = (double*) malloc(sizeof(double) * num_clusters); //weighted cc is according to [B. Zhang and S. Horvath, Stat. App. Genet. Mol. Biol. 4, 17 2005.] for(int vIdx = 0; vIdx < *nvtxs; vIdx++){ //v1 for(std::set<idxtype>::iterator cID = indices[vIdx].begin(); cID != indices[vIdx].end(); cID ++){ //for each cluster, calculate its clustering coefficient double numerator = 0; double denominator1 = 0; double denominator2 = 0; for(int adjIdx = xadj[vIdx]; adjIdx < xadj[vIdx+1]; adjIdx++){ if(indices[adjncy[adjIdx]].find(*cID) != indices[adjncy[adjIdx]].end() && adjncy[adjIdx] != vIdx){ //if another node v1 is also in this cluster double wij = (double)adjwgt[adjIdx] / max_weight; denominator1 += wij; denominator2 += pow(wij,2.0); for(int adjIdx2 = adjIdx+1; adjIdx2 < xadj[vIdx+1]; adjIdx2++){ if(indices[adjncy[adjIdx2]].find(*cID) != indices[adjncy[adjIdx2]].end() && adjncy[adjIdx2] != vIdx){ //if another node v2 is also in this cluster double wik = (double)adjwgt[adjIdx2] / max_weight; for(int i=xadj[adjncy[adjIdx]]; i<xadj[adjncy[adjIdx]+1]; i++){ //find whether v1 and v2 are connected if(adjncy[i] == adjncy[adjIdx2]){ //if v1 and v2 are connected double wjk = (double)adjwgt[i] / max_weight; numerator += wij*wik*wjk; //if(*cID == 3) break; } } } } } } denominator1 = pow(denominator1, 2.0); double denominator = denominator1 - denominator2; numerator *= 2; double cluster_coefficient = 0; if(numerator != 0) cluster_coefficient = numerator / denominator; //printf("cID:%d node:%d value:%.3f (%.3f/%.3f)\n",*cID, vIdx+1,cluster_coefficient,numerator,denominator ); cluster_coefficients[*cID] += cluster_coefficient; } } #endif //calculate weighted density double* num_internal_edges = (double*) malloc( sizeof(double) * num_clusters); for(int i=0; i<num_clusters; i++) num_internal_edges[i] = 0; for(int vID=0; vID<*nvtxs ; vID++){ for(std::set<idxtype>::iterator cIterator = indices[vID].begin(); cIterator != indices[vID].end(); cIterator++){ idxtype cID = *(cIterator); for( int j = xadj[vID]; j < xadj[vID+1]; j++ ){ if( adjncy[j]!=vID && clusters[cID].find( adjncy[j] ) != clusters[cID].end() ){ //contains it if(opt.weighted_density) num_internal_edges[ cID ] += (adjwgt[j] / max_weight); //weighted version else num_internal_edges[ cID ] ++; } } } } //printf("double* densities = (double*) malloc(sizeof(double) * (*nvtxs)); \n"); fflush(stdout); double* densities = (double*) malloc(sizeof(double) * num_clusters); for(int cID = 0; cID < num_clusters ; cID++){ int size = clusters[cID].size(); if(size <= 1) densities[cID] = 0; else densities[cID] = num_internal_edges[ cID ] / size / (size-1); } free(num_internal_edges); int num_pruned_clusters_density = 0; for(int cID = 0; cID < num_clusters ; cID++){ #ifdef CLUSTER_COEFFICIENT if(clusters[cID].size() > 1){ cluster_coefficients[cID] /= clusters[cID].size(); densities[cID] = cluster_coefficients[cID] ; } #endif #ifdef TEST_OUTPUT printf("cluster %d: \tdensity:%.3f\tsize:%zu\n",cID, densities[cID], clusters[cID].size()); #endif if(clusters[cID].size()<=2 || densities[cID] * sqrt((double)clusters[cID].size()) < opt.quality_threshold){ //remove the cluster //if(clusters[cID].size()<=2 || densities[cID] < opt.quality_threshold){ //remove the cluster for(std::set<idxtype>::iterator nodeIterator = clusters[cID].begin(); nodeIterator != clusters[cID].end(); nodeIterator++){ //printf(" nodeIterator:%d\n",*nodeIterator); if(indices[*nodeIterator].find(cID) != indices[*nodeIterator].end()){ //printf(" if nodeIterator:%d\n",*(indices[*nodeIterator].find(cID))); indices[*nodeIterator].erase(cID); } //printf(" nodeIterator:%d done\n",*nodeIterator); } num_pruned_clusters_density++; clusters[cID].clear(); densities[cID] = -1; #ifdef CLUSTER_COEFFICIENT cluster_coefficients [cID] = -1; #endif } } printf("number of clusters pruned out since their density are smaller than %.3f:\t%d\n", opt.quality_threshold, num_pruned_clusters_density); //prune out clusters according to their redundancy (sort clusters by density * sqrt(size)) int num_pruned_clusters_overlap = 0; int num_clusters_after_pruning_density = (num_clusters-num_pruned_clusters_density); printf("sort clusters\n"); //printf("malloc test: %d, sizeof(Cluster):%d (int:%d)\n",num_clusters_after_pruning_density,sizeof(Cluster),sizeof(int)); fflush(stdout); Cluster* cluster_array = (Cluster*) malloc(sizeof(Cluster)*(num_clusters_after_pruning_density )); //printf("malloc test2: %d\n",num_clusters_after_pruning_density); fflush(stdout); int tempIdx = 0; for(int i=0; i < num_clusters; i++){ if(densities[i] >= 0){ Cluster c(i,clusters[i].size(), densities[i] ); cluster_array[tempIdx] = c; tempIdx++; } } double avg_cluster_size = 0; qsort(cluster_array, num_clusters_after_pruning_density , sizeof(Cluster), compareCluster); for(int i=0; i<num_clusters_after_pruning_density ; i++){ int cID1 = cluster_array[i].cID; if(clusters[cID1].size() == 0) continue; //printf("examined cluster cID1: %d\n",cID1);fflush(stdout); for(int j=i+1; j<num_clusters_after_pruning_density ; j++){ //calculate overlap size int cID2 = cluster_array[j].cID; //printf(" examined cluster cID2: %d\n",cID2);fflush(stdout); if(clusters[cID2].size() == 0) continue; double overlap = 0; for ( std::set<idxtype>::iterator iterator = clusters[cID2].begin(); iterator != clusters[cID2].end(); iterator++ ){ if ( clusters[cID1].find(*iterator) != clusters[cID1].end() ){ overlap++; } } //printf("overlap: %1.0f\n",overlap);fflush(stdout); //calculate neighbor affinity float overlapNA = pow(overlap,2) / clusters[cID1].size() / clusters[cID2].size(); if(overlapNA >= opt.redundancy_threshold){ //remove the cluster with cID2 for(std::set<idxtype>::iterator nodeIterator = clusters[cID2].begin(); nodeIterator != clusters[cID2].end(); nodeIterator++){ indices[*nodeIterator].erase(cID2); } num_pruned_clusters_overlap++; #ifdef TEST_OUTPUT printf(" overlapNA:%.3f, (keep cID:%d, remove cID:%d) test size:%1f, %d, %d\n",overlapNA,cID1,overlap, cID2,clusters[cID1].size(),clusters[cID2].size());fflush(stdout); #endif clusters[cID2].clear(); densities[cID2] = 0; } } avg_cluster_size += clusters[cID1].size(); } printf("number of overlapped clusters pruned out since their NA are larger than %.3f:\t%d\n", opt.redundancy_threshold, num_pruned_clusters_overlap);fflush(stdout); num_clusters -= (num_pruned_clusters_density + num_pruned_clusters_overlap); printf("*********afer prunning, total # clusters:\t%d**********\n",num_clusters);fflush(stdout); avg_cluster_size /= num_clusters; printf("*********average cluster size:\t%.3f**********\n",avg_cluster_size);fflush(stdout); int coverage = 0; for(int vID=0; vID<*nvtxs ; vID++){ if(indices[vID].size() > 0) coverage++; } printf("*********coverage:\t%d**********\n",coverage);fflush(stdout); free(cluster_array); free(graph); }
void mlmcl(int* nvtxs, idxtype* xadj, idxtype* adjncy, idxtype *vwgt, idxtype* adjwgt, int* wgtflag, idxtype* indices, Options opt) { /* GraphType graph; my_SetUpGraph(&graph, *nvtxs, xadj, adjncy, vwgt, adjwgt, *wgtflag, 1); */ int hubRemoval=opt.hubRemoval, recursiveCluster=0; float hub_pct = opt.hubPct; GraphType *graph = (GraphType*)malloc(sizeof(GraphType)); my_SetUpGraph(graph, *nvtxs, xadj, adjncy, vwgt, adjwgt, *wgtflag, 1); // The last argument indicates we are setting up the original // graph idxtype* newIds; if ( hubRemoval > 0 ) { int hubThreshold = (int) floor(hub_pct * graph->nvtxs); GraphType *new_graph; newIds = removeHubs(graph, hubThreshold, *wgtflag, &new_graph, 0); free(graph->gdata); free(graph); graph = new_graph; // now need to remove any nodes that became singletons // because of hub removal. // we'll do another iteration of newIds, so back up // the old newIds. newIds_bkp is of size *nvtxs. idxtype *newIds_bkp = newIds; int noOfSingletons = 0, newIdCounter; newIds = lookForSingletons(graph, &noOfSingletons); newIdCounter = graph->nvtxs - noOfSingletons; if ( noOfSingletons > 0 ) { printf("%d nodes became singletons due to hub removal", noOfSingletons ); printf("; they will be removed.\n"); fflush(stdout); getSubgraph(graph, newIds, newIdCounter, *wgtflag, &new_graph); free(graph->gdata); free(graph); graph = new_graph; int i; for ( i=0; i<*nvtxs; i++ ) { if ( newIds_bkp[i] > -1 ) { newIds_bkp[i] = newIds[newIds_bkp[i]]; } else newIds_bkp[i] = -1; } free(newIds); } newIds=newIds_bkp; } // printf("nnz:%d\n",graph.xadj[*nvtxs]); if ( opt.mis_coarsenType > 0 ) { // mis_mlrmcl(graph, indices, opt); } else { mlmclWithGraph(graph, indices, opt); } if ( hubRemoval > 0 ) { int npart=mapPartition(indices, graph->nvtxs); float ncut=ComputeNCut(graph, indices, npart); printf("In graph that does not include hubs,"); printf("No. of Clusters:%d, N-Cut value: %.2f\n", npart, ncut); mapIndices(indices, newIds, *nvtxs, npart); free(newIds); if ( *nvtxs - graph->nvtxs > 0 ) { char filename[256]; sprintf(filename, "input.nohubs.%.3f", hub_pct); WriteGraph(filename, graph->nvtxs, graph->xadj, graph->adjncy); printf("Wrote nohubs graph to %s\n", filename); } } if ( recursiveCluster > 0 ) { int npart = mapPartition(indices, graph->nvtxs); float ncut = ComputeNCut(graph, indices, npart); printf("No. of clusters:%d, N-Cut:%.2f\n", npart, ncut); idxtype* hist = histogram(indices, graph->nvtxs, npart); int max=0, i=0, maxCluster=-1; for( i=0; i<npart; i++ ) { if ( hist[i] > max ) { max = hist[i]; maxCluster = i; } } free(hist); if ( max > graph->nvtxs * 0.3 ) { printf("Will recursively partition cluster of size"); printf(" %d\n", max); idxtype* newIds = idxmalloc(graph->nvtxs,"mlmcl:newIds"); int newIdCounter=0; for ( i=0; i<graph->nvtxs; i++ ) { if ( indices[i] == maxCluster ) newIds[i]=newIdCounter++; else newIds[i]=-1; } GraphType *new_graph; getSubgraph(graph, newIds, max, *wgtflag, &new_graph); idxtype *new_indices = idxmalloc(max,"mlmcl:new_indices"); opt.coarsenTo = (int) round(((float) max / (float)graph->nvtxs) * opt.coarsenTo); mlmcl(&max,new_graph->xadj, new_graph->adjncy, new_graph->vwgt, new_graph->adjwgt, wgtflag, new_indices, opt ); int new_npart = mapPartition( new_indices, max); for ( i=0; i<graph->nvtxs; i++ ) { if ( newIds[i] > -1 ) { int ni = new_indices[newIds[i]]; if ( ni > 0 ) indices[newIds[i]] = npart + ni - 1; else indices[newIds[i]] = maxCluster; } } printf("Recursive clustering yielded %d new",new_npart); printf(" clusters."); free(new_indices); free(newIds); free(new_graph->gdata); free(new_graph); } } }