void testCoarsening(int *nvtxs, idxtype *xadj, idxtype *adjncy,
                    idxtype *vwgt, idxtype *adjwgt,
                    int *wgtflag, int ct)
{
    GraphType graph, *cgraph;
    CtrlType ctrl;

    my_SetUpGraph(&graph, *nvtxs, xadj, adjncy, vwgt, adjwgt,
                  *wgtflag, 1);
    /* The last argument indicates we are setting up the original
     * graph */

    ctrl.CoarsenTo=ct;
    ctrl.CType=MATCH_SHEMN;

    my_AllocateWorkSpace(&ctrl,&graph);

    cgraph = Coarsen2Way(&ctrl,&graph);

    do {
        dump_graph(cgraph);
        cgraph = cgraph->finer;
    } while ( cgraph != NULL );

}
Exemple #2
0
void srmcl(int* nvtxs, idxtype* xadj, idxtype* adjncy, idxtype
*vwgt, idxtype* adjwgt, int* wgtflag, std::vector<std::set<idxtype> >& indices, Options opt)
{
	



	GraphType *graph = (GraphType*)malloc(sizeof(GraphType));
	my_SetUpGraph(graph, *nvtxs, xadj, adjncy, vwgt, adjwgt, *wgtflag, 1);


	std::vector< std::set<idxtype> > clusters = srmclWithGraph(graph, indices, opt);   //main procedure!


	//post processing: prune out clusters
	printf("start post-processing - prune clusters.\n");
	int num_clusters = clusters.size();

	double max_weight = 10000;
//printf("double max_weight = 10000;\n");
	//prune out clusters according to their clustering coefficient
#ifdef CLUSTER_COEFFICIENT
	double* cluster_coefficients = (double*) malloc(sizeof(double) * num_clusters);  
	//weighted cc is according to [B. Zhang and S. Horvath, Stat. App. Genet. Mol. Biol. 4, 17 2005.]
	for(int vIdx = 0; vIdx < *nvtxs; vIdx++){   //v1
		for(std::set<idxtype>::iterator cID = indices[vIdx].begin(); cID != indices[vIdx].end(); cID ++){ //for each cluster, calculate its clustering coefficient
			double numerator = 0;
			double denominator1 = 0;
			double denominator2 = 0;
			for(int adjIdx = xadj[vIdx]; adjIdx < xadj[vIdx+1]; adjIdx++){
				if(indices[adjncy[adjIdx]].find(*cID) != indices[adjncy[adjIdx]].end() && adjncy[adjIdx] != vIdx){  //if another node v1 is also in this cluster 
					double wij = (double)adjwgt[adjIdx] / max_weight;
					denominator1 += wij;
					denominator2 += pow(wij,2.0);
					for(int adjIdx2 = adjIdx+1; adjIdx2 < xadj[vIdx+1]; adjIdx2++){
						if(indices[adjncy[adjIdx2]].find(*cID) != indices[adjncy[adjIdx2]].end()  && adjncy[adjIdx2] != vIdx){ //if another node v2 is also in this cluster 
							double wik = (double)adjwgt[adjIdx2] / max_weight;
							for(int i=xadj[adjncy[adjIdx]]; i<xadj[adjncy[adjIdx]+1]; i++){ //find whether v1 and v2 are connected
								if(adjncy[i] == adjncy[adjIdx2]){  //if v1 and v2 are connected
									double wjk =  (double)adjwgt[i] / max_weight;
									numerator += wij*wik*wjk;
									//if(*cID == 3)
										
									break;
								}
							}
						}
					}
				}
			}
			denominator1 = pow(denominator1, 2.0);
			double denominator = denominator1 - denominator2;			
			numerator *= 2;
			double cluster_coefficient = 0; 
			if(numerator != 0)
				cluster_coefficient = numerator / denominator;
			//printf("cID:%d node:%d value:%.3f (%.3f/%.3f)\n",*cID, vIdx+1,cluster_coefficient,numerator,denominator );
			cluster_coefficients[*cID] += cluster_coefficient;
		}
	}
#endif

	//calculate weighted density


	double* num_internal_edges = (double*) malloc( sizeof(double) * num_clusters);
	for(int i=0; i<num_clusters; i++)
		num_internal_edges[i] = 0;
	for(int vID=0; vID<*nvtxs ; vID++){
		for(std::set<idxtype>::iterator cIterator = indices[vID].begin(); cIterator != indices[vID].end(); cIterator++){
			idxtype cID = *(cIterator);

			for( int j = xadj[vID]; j < xadj[vID+1]; j++ ){
				if( adjncy[j]!=vID && clusters[cID].find( adjncy[j] ) != clusters[cID].end()  ){  //contains it
					if(opt.weighted_density)
						num_internal_edges[ cID ] += (adjwgt[j] / max_weight);   //weighted version
					else
						num_internal_edges[ cID ] ++; 		
				}
			}
		}

	}

//printf("double* densities = (double*) malloc(sizeof(double) * (*nvtxs));  \n"); fflush(stdout);
	double* densities = (double*) malloc(sizeof(double) * num_clusters);  
	for(int cID = 0; cID < num_clusters ; cID++){
		int size = clusters[cID].size();
		if(size <= 1)
			densities[cID] = 0;
		else
			densities[cID] = num_internal_edges[ cID ] / size / (size-1);
	}
	free(num_internal_edges);


	int num_pruned_clusters_density = 0;	
	for(int cID = 0; cID < num_clusters ; cID++){
#ifdef CLUSTER_COEFFICIENT
		if(clusters[cID].size() > 1){
			cluster_coefficients[cID] /= clusters[cID].size();
			densities[cID] = cluster_coefficients[cID] ;
		}
#endif

#ifdef TEST_OUTPUT	
		printf("cluster %d: \tdensity:%.3f\tsize:%zu\n",cID, densities[cID], clusters[cID].size());
#endif
		if(clusters[cID].size()<=2 || densities[cID] * sqrt((double)clusters[cID].size()) < opt.quality_threshold){  //remove the cluster
		//if(clusters[cID].size()<=2 || densities[cID]  < opt.quality_threshold){  //remove the cluster
			for(std::set<idxtype>::iterator nodeIterator = clusters[cID].begin(); nodeIterator != clusters[cID].end(); nodeIterator++){
				//printf(" nodeIterator:%d\n",*nodeIterator);
				if(indices[*nodeIterator].find(cID) != indices[*nodeIterator].end()){
					//printf("  if nodeIterator:%d\n",*(indices[*nodeIterator].find(cID)));
					indices[*nodeIterator].erase(cID);
				}
				//printf(" nodeIterator:%d done\n",*nodeIterator);
			}
			num_pruned_clusters_density++;

			clusters[cID].clear();
			densities[cID] = -1;
#ifdef CLUSTER_COEFFICIENT
			cluster_coefficients [cID] = -1;
#endif
		}
	}
	printf("number of clusters pruned out since their density are smaller than %.3f:\t%d\n", opt.quality_threshold, num_pruned_clusters_density);

		
	//prune out clusters according to their redundancy (sort clusters by density * sqrt(size))
	int num_pruned_clusters_overlap = 0;
	int num_clusters_after_pruning_density = (num_clusters-num_pruned_clusters_density);
	printf("sort clusters\n");
//printf("malloc test: %d, sizeof(Cluster):%d (int:%d)\n",num_clusters_after_pruning_density,sizeof(Cluster),sizeof(int)); fflush(stdout);
	Cluster* cluster_array = (Cluster*) malloc(sizeof(Cluster)*(num_clusters_after_pruning_density ));
//printf("malloc test2: %d\n",num_clusters_after_pruning_density); fflush(stdout);
	int tempIdx = 0;
	for(int i=0; i < num_clusters; i++){
		if(densities[i] >= 0){
			Cluster c(i,clusters[i].size(),	densities[i] );
			cluster_array[tempIdx] = c;
			tempIdx++;
		}
	}

	double avg_cluster_size = 0;
	qsort(cluster_array, num_clusters_after_pruning_density  , sizeof(Cluster), compareCluster);
	for(int i=0; i<num_clusters_after_pruning_density   ; i++){
		int cID1 = cluster_array[i].cID;
		if(clusters[cID1].size() == 0)
			continue;
		//printf("examined cluster cID1: %d\n",cID1);fflush(stdout);	
		for(int j=i+1; j<num_clusters_after_pruning_density ; j++){
			
			//calculate overlap size
			int cID2 = cluster_array[j].cID;
			//printf(" examined cluster cID2: %d\n",cID2);fflush(stdout);	
			if(clusters[cID2].size() == 0)
				continue;	
			double overlap = 0;

			for ( std::set<idxtype>::iterator iterator = clusters[cID2].begin(); iterator != clusters[cID2].end(); iterator++ ){
				
				if ( clusters[cID1].find(*iterator) != clusters[cID1].end() ){
					overlap++;
				}
					
			}
			//printf("overlap: %1.0f\n",overlap);fflush(stdout);	
			//calculate neighbor affinity
			float overlapNA = pow(overlap,2) / clusters[cID1].size() / clusters[cID2].size();
			if(overlapNA >= opt.redundancy_threshold){  //remove the cluster with cID2
				
				for(std::set<idxtype>::iterator nodeIterator = clusters[cID2].begin(); nodeIterator != clusters[cID2].end(); nodeIterator++){

					indices[*nodeIterator].erase(cID2);
					
				}
				num_pruned_clusters_overlap++;
#ifdef TEST_OUTPUT
				printf(" overlapNA:%.3f, (keep cID:%d, remove cID:%d) test size:%1f, %d, %d\n",overlapNA,cID1,overlap, cID2,clusters[cID1].size(),clusters[cID2].size());fflush(stdout);
#endif
				clusters[cID2].clear();
				densities[cID2] = 0;
			}
			
		}
		avg_cluster_size += clusters[cID1].size();
	}
	printf("number of overlapped clusters pruned out since their NA are larger than %.3f:\t%d\n", opt.redundancy_threshold, num_pruned_clusters_overlap);fflush(stdout);

	num_clusters -=  (num_pruned_clusters_density + num_pruned_clusters_overlap);
	printf("*********afer prunning, total # clusters:\t%d**********\n",num_clusters);fflush(stdout);

	avg_cluster_size /= num_clusters;
	printf("*********average cluster size:\t%.3f**********\n",avg_cluster_size);fflush(stdout);

	int coverage = 0;
	for(int vID=0; vID<*nvtxs ; vID++){
		if(indices[vID].size() > 0)
			coverage++;
	}
	printf("*********coverage:\t%d**********\n",coverage);fflush(stdout);

	free(cluster_array);
	free(graph);
}
void mlmcl(int* nvtxs, idxtype* xadj, idxtype* adjncy, idxtype
*vwgt, idxtype* adjwgt, int* wgtflag, idxtype* indices, Options opt)
{
 /*	GraphType graph;
	my_SetUpGraph(&graph, *nvtxs, xadj, adjncy, vwgt, adjwgt,
	*wgtflag, 1); */
	int hubRemoval=opt.hubRemoval, recursiveCluster=0;
	float hub_pct = opt.hubPct;

	GraphType *graph = (GraphType*)malloc(sizeof(GraphType));
	my_SetUpGraph(graph, *nvtxs, xadj, adjncy, vwgt, adjwgt,
	*wgtflag, 1);
	// The last argument indicates we are setting up the original
	// graph 

	idxtype* newIds;
	if ( hubRemoval > 0 )
	{
		int hubThreshold = (int) floor(hub_pct * graph->nvtxs);
		GraphType *new_graph;
		newIds = removeHubs(graph, hubThreshold, *wgtflag,
						&new_graph, 0);
		free(graph->gdata);
		free(graph);
		graph = new_graph;
		
		// now need to remove any nodes that became singletons
		// because of hub removal.

		// we'll do another iteration of newIds, so back up 
		// the old newIds. newIds_bkp is of size *nvtxs.
		idxtype *newIds_bkp = newIds;

		int noOfSingletons = 0, newIdCounter;
		newIds = lookForSingletons(graph, &noOfSingletons);
		newIdCounter = graph->nvtxs - noOfSingletons;

		if ( noOfSingletons > 0 )
		{
			printf("%d nodes became singletons due to hub removal", 
						noOfSingletons );
			printf("; they will be removed.\n");
			fflush(stdout);

			getSubgraph(graph, newIds, newIdCounter, *wgtflag, 
							&new_graph);
			free(graph->gdata);
			free(graph);
			graph = new_graph;
			
			int i;
			for ( i=0; i<*nvtxs; i++ )
			{
				if ( newIds_bkp[i] > -1 )
				{
					newIds_bkp[i] = newIds[newIds_bkp[i]];
				}
				else
					newIds_bkp[i] = -1;
			}
			free(newIds);
		}

		newIds=newIds_bkp;
	}

//	printf("nnz:%d\n",graph.xadj[*nvtxs]);
	if ( opt.mis_coarsenType > 0 )
	{
//		mis_mlrmcl(graph, indices, opt); 
	}
	else
	{
		mlmclWithGraph(graph, indices, opt);
	}

	if ( hubRemoval > 0 )
	{
		int npart=mapPartition(indices, graph->nvtxs);
		float ncut=ComputeNCut(graph, indices, npart);
		printf("In graph that does not include hubs,"); 
		printf("No. of Clusters:%d, N-Cut value: %.2f\n", npart, ncut);

		mapIndices(indices, newIds, *nvtxs, npart);
		free(newIds);
		if ( *nvtxs - graph->nvtxs > 0 )
		{
			char filename[256];
			sprintf(filename, "input.nohubs.%.3f", hub_pct);
			WriteGraph(filename, graph->nvtxs, graph->xadj,
			graph->adjncy);
			printf("Wrote nohubs graph to %s\n", filename);
		}
	}

	if ( recursiveCluster > 0 )
	{
		int npart = mapPartition(indices, graph->nvtxs);
		float ncut = ComputeNCut(graph, indices, npart);
		printf("No. of clusters:%d, N-Cut:%.2f\n", npart, ncut);
		idxtype* hist = histogram(indices, graph->nvtxs, npart);

		int max=0, i=0, maxCluster=-1;
		for( i=0; i<npart; i++ )
		{
			if ( hist[i] > max )
			{
				max = hist[i];
				maxCluster = i;
			}
		}

		free(hist);

		if ( max > graph->nvtxs * 0.3 )
		{
			printf("Will recursively partition cluster of size");
			printf(" %d\n", max);
			
			idxtype* newIds = idxmalloc(graph->nvtxs,"mlmcl:newIds");
			int newIdCounter=0;
			for ( i=0; i<graph->nvtxs; i++ )
			{
				if ( indices[i] == maxCluster )
					newIds[i]=newIdCounter++;
				else
					newIds[i]=-1;
			}
			
			GraphType *new_graph;
			getSubgraph(graph, newIds, max, *wgtflag, &new_graph);

			idxtype *new_indices = idxmalloc(max,"mlmcl:new_indices");
			opt.coarsenTo = (int) round(((float) max 
							/ (float)graph->nvtxs) * opt.coarsenTo);
			mlmcl(&max,new_graph->xadj, new_graph->adjncy,
			new_graph->vwgt, new_graph->adjwgt, wgtflag,
			new_indices, opt );

			int new_npart = mapPartition( new_indices, max);
			for ( i=0; i<graph->nvtxs; i++ )
			{
				if ( newIds[i] > -1 )
				{
					int ni = new_indices[newIds[i]];
					if ( ni > 0 )
						indices[newIds[i]] = npart + ni - 1;
					else
						indices[newIds[i]] = maxCluster;
				}
			}
			
			printf("Recursive clustering yielded %d new",new_npart);
			printf(" clusters.");

			free(new_indices);
			free(newIds);
			free(new_graph->gdata);
			free(new_graph);

		}

	}
}