Exemple #1
0
int main(int argc, char *argv[])
{
	int i, npart;
	idxtype *part;
	float ncut=0;
	GraphType graph;
	char filename[256],outputFile[256];
	int wgtflag = 0, addSelfLoop=1, outputFileGiven=0, txtFormat=0 ;
	int randomInit = 0;
	idxtype minEdgeWeight = 0;
	Options opt;
	timer TOTALTmr, METISTmr, IOTmr;

	initOptions(&opt);

	if (argc < 2) {
		print_help(argv[0]);
		exit(0);
	}
	
	for (argv++; *argv != NULL; argv++){
	    if ((*argv)[0] == '-')
		{
			int temp;
	      	switch ((*argv)[1])
			{
			case 'b':
			case 'B':
			  opt.penalty_power=atof(*(++argv));
			  break;
			case 'i':
			case 'I':
			  opt.gamma=atof(*(++argv));
			  break;
			case 'o':
			case 'O':
			  strcpy(outputFile,*(++argv));
			  outputFileGiven=1;
			  break;
			case 'D'://quality threshold. This is a post-processing step proposed in SR-MCL. If you dont want post-processing (this is what original MLR-MCL, R-MCL, MCL do, please set "-d 0"  
			case 'd':
			  opt.quality_threshold = atof(*(++argv));
			  break;
			case 'w':
			case 'W':
		          opt.weighted_density = true; 
			  break;

			case 'c':
			case 'C':
			  opt.coarsenTo= atoi(*(++argv));
			  break;
			default:
			  printf("Invalid option %s\n", *argv);
			  print_help(argv[0]);
			  exit(0);
			}
		}
	    else
		{
	      strcpy(filename, *argv);
	    }
	}  

	if ( randomInit > 0 )
		InitRandom(time(NULL));
	else
		InitRandom(-1);

	cleartimer(TOTALTmr);
	cleartimer(METISTmr);
	cleartimer(IOTmr);

	starttimer(TOTALTmr);
	starttimer(IOTmr);

	ReadGraph(&graph, filename, &wgtflag, addSelfLoop, txtFormat);

	if ( opt.matchType == MATCH_UNSPECIFIED )
	{
//		opt.matchType = (graph.nvtxs>50000) ? MATCH_POWERLAW_FC :
//							MATCH_SHEMN;
		opt.matchType = MATCH_SHEMN;
	}
	
	stoptimer(IOTmr);

	if (graph.nvtxs <= 0) {
	  printf("Empty graph. Nothing to do.\n");
	  exit(0);
	}

	int noOfSingletons = 0; 
	GraphType *noSingletonGraph ;
	idxtype* nodeMap = lookForSingletons(&graph, &noOfSingletons);
	if ( noOfSingletons > 0 )
	{
		getSubgraph(&graph, nodeMap, graph.nvtxs-noOfSingletons, 
						wgtflag, &noSingletonGraph);
		GKfree((void**)&(graph.xadj), (void**)&(graph.adjncy), LTERM);
		if ( wgtflag&1 > 0 )
			GKfree( (void**)&(graph.adjwgt), LTERM);
//		free(graph.gdata);
		printf("Found %d singleton nodes in the", noOfSingletons);
		printf(" input graph. Removing them.\n");
	}

	if ( !outputFileGiven )
	{
		strcpy(outputFile, filename);

		sprintf(outputFile,"%s.c%d.i%1.1f.b%1.1f",outputFile,opt.coarsenTo,opt.gamma,opt.penalty_power);
	}
	
	printf("Input graph information ---------------------------------------------------\n");
	printf("  Name: %s, #Vertices: %d, #Edges: %d\n", filename, graph.nvtxs, graph.nedges/2);
	printf("Output shall be placed in the file %s\n",
	outputFile);
	fflush(stdout);

	part = idxmalloc(graph.nvtxs, "main: part");

	printf("------------------------------------------------\n");
	printf("Clustering....\n");
	fflush(stdout);
	starttimer(METISTmr);         //YK: main algorithm starts here!

	if ( noOfSingletons > 0 )
	{
		
		mlmcl(&(noSingletonGraph->nvtxs), noSingletonGraph->xadj, noSingletonGraph->adjncy,
			noSingletonGraph->vwgt,noSingletonGraph->adjwgt, &wgtflag, part, opt ); 
	}
	else	
	{
		mlmcl(&graph.nvtxs, graph.xadj, graph.adjncy,graph.vwgt,
			graph.adjwgt, &wgtflag, part, opt ); 
	}

	stoptimer(METISTmr); 

	printf("------------------------------------------------\n");
	if ( noOfSingletons > 0 )
	{
		npart=mapPartition(part,noSingletonGraph->nvtxs);
		ncut=ComputeNCut(noSingletonGraph, part,npart);
//		printf("In graph that does not include singletons,");
//		printf("No. of Clusters: %d, N-Cut value: %.2f\n",npart,ncut);


		idxtype *clusterSizes = histogram(part,
					graph.nvtxs-noOfSingletons, npart);

		int maxSize = clusterSizes[idxamax(npart, clusterSizes)];
		float avgClusterSize =
						(graph.nvtxs-noOfSingletons)*1.0/(npart);
		float balance =	(maxSize*1.0) /
				((graph.nvtxs-noOfSingletons)*1.0/npart);
		float stdDevn = stdDeviation(clusterSizes, npart);
		float avgNcut = ncut * 1.0/npart;
		float normStdDevn = stdDevn/avgClusterSize;

	// Warning: This computation only works if the singletons
	// have been placed in their own clusters. This works for
	// MLR-MCL, in other words, because it is guaranteed to
	// place singletons in their own clusters.
		printf("Output statistics for graph without singletons\n");
		printf("Clusters: %d N-Cut: %.3f", 
					npart, ncut);
		printf(" AvgN-Cut: %.3f Balance in cluster sizes: %.2f ",avgNcut,
					balance); 
		printf("Std_Deviation in cluster sizes: %.2f ", stdDevn);
		printf("Coefficient_of_Variation: %.2f\n", normStdDevn);

		free( clusterSizes );

		npart += noOfSingletons;
	//	ncut += noOfSingletons;
		printf("Output statistics for original graph\n");

		mapIndices(part, nodeMap, graph.nvtxs, npart-noOfSingletons);
	}
	else
	{
		npart=mapPartition(part,graph.nvtxs);
		ncut=ComputeNCut(&graph, part,npart);
	}

	idxtype* clusterSizes = histogram(part, graph.nvtxs, npart);
	int maxSize = clusterSizes[idxamax(npart, clusterSizes)];
	float avgClusterSize = (graph.nvtxs)*1.0/(npart);
	float balance = (maxSize*1.0)/(graph.nvtxs*1.0/npart);
	float stdDevn = stdDeviation(clusterSizes, npart);
	float avgNcut = ncut * 1.0/npart;
	float normStdDevn = stdDevn/avgClusterSize;
	
	printf("Clusters: %d N-Cut: %.3f AvgN-Cut: %.3f", npart,
						ncut, avgNcut );
	printf(" Balance in cluster sizes: %.2f Std.Deviation in cluster sizes: %.2f ",
				 balance, stdDevn);
	printf("Coefficient_of_Variation: %.2f\n", normStdDevn);

	starttimer(IOTmr);
	my_WritePartition(outputFile, part, graph.nvtxs, opt.gamma); 
	if ( noOfSingletons > 0 )
	{
		free(nodeMap);
		nodeMap = NULL;
	}

	printf("\nOutput is written to file: %s\n", outputFile);
	stoptimer(IOTmr);
	stoptimer(TOTALTmr);
	
	printf("\nTiming Information --------------------------------------------------\n");
	printf("  I/O:          \t\t %7.3f\n", gettimer(IOTmr));
	printf("  Partitioning: \t\t %7.3f   (MLR-MCL time)\n", gettimer(METISTmr));
	printf("  Total:        \t\t %7.3f\n", gettimer(TOTALTmr));
	printf("**********************************************************************\n");


	GKfree((void**)&graph.xadj, (void**)&graph.adjncy, (void**)&graph.vwgt, 
				(void**)&graph.adjwgt, (void**)&part, LTERM);
}  
void mlmcl(int* nvtxs, idxtype* xadj, idxtype* adjncy, idxtype
*vwgt, idxtype* adjwgt, int* wgtflag, idxtype* indices, Options opt)
{
 /*	GraphType graph;
	my_SetUpGraph(&graph, *nvtxs, xadj, adjncy, vwgt, adjwgt,
	*wgtflag, 1); */
	int hubRemoval=opt.hubRemoval, recursiveCluster=0;
	float hub_pct = opt.hubPct;

	GraphType *graph = (GraphType*)malloc(sizeof(GraphType));
	my_SetUpGraph(graph, *nvtxs, xadj, adjncy, vwgt, adjwgt,
	*wgtflag, 1);
	// The last argument indicates we are setting up the original
	// graph 

	idxtype* newIds;
	if ( hubRemoval > 0 )
	{
		int hubThreshold = (int) floor(hub_pct * graph->nvtxs);
		GraphType *new_graph;
		newIds = removeHubs(graph, hubThreshold, *wgtflag,
						&new_graph, 0);
		free(graph->gdata);
		free(graph);
		graph = new_graph;
		
		// now need to remove any nodes that became singletons
		// because of hub removal.

		// we'll do another iteration of newIds, so back up 
		// the old newIds. newIds_bkp is of size *nvtxs.
		idxtype *newIds_bkp = newIds;

		int noOfSingletons = 0, newIdCounter;
		newIds = lookForSingletons(graph, &noOfSingletons);
		newIdCounter = graph->nvtxs - noOfSingletons;

		if ( noOfSingletons > 0 )
		{
			printf("%d nodes became singletons due to hub removal", 
						noOfSingletons );
			printf("; they will be removed.\n");
			fflush(stdout);

			getSubgraph(graph, newIds, newIdCounter, *wgtflag, 
							&new_graph);
			free(graph->gdata);
			free(graph);
			graph = new_graph;
			
			int i;
			for ( i=0; i<*nvtxs; i++ )
			{
				if ( newIds_bkp[i] > -1 )
				{
					newIds_bkp[i] = newIds[newIds_bkp[i]];
				}
				else
					newIds_bkp[i] = -1;
			}
			free(newIds);
		}

		newIds=newIds_bkp;
	}

//	printf("nnz:%d\n",graph.xadj[*nvtxs]);
	if ( opt.mis_coarsenType > 0 )
	{
//		mis_mlrmcl(graph, indices, opt); 
	}
	else
	{
		mlmclWithGraph(graph, indices, opt);
	}

	if ( hubRemoval > 0 )
	{
		int npart=mapPartition(indices, graph->nvtxs);
		float ncut=ComputeNCut(graph, indices, npart);
		printf("In graph that does not include hubs,"); 
		printf("No. of Clusters:%d, N-Cut value: %.2f\n", npart, ncut);

		mapIndices(indices, newIds, *nvtxs, npart);
		free(newIds);
		if ( *nvtxs - graph->nvtxs > 0 )
		{
			char filename[256];
			sprintf(filename, "input.nohubs.%.3f", hub_pct);
			WriteGraph(filename, graph->nvtxs, graph->xadj,
			graph->adjncy);
			printf("Wrote nohubs graph to %s\n", filename);
		}
	}

	if ( recursiveCluster > 0 )
	{
		int npart = mapPartition(indices, graph->nvtxs);
		float ncut = ComputeNCut(graph, indices, npart);
		printf("No. of clusters:%d, N-Cut:%.2f\n", npart, ncut);
		idxtype* hist = histogram(indices, graph->nvtxs, npart);

		int max=0, i=0, maxCluster=-1;
		for( i=0; i<npart; i++ )
		{
			if ( hist[i] > max )
			{
				max = hist[i];
				maxCluster = i;
			}
		}

		free(hist);

		if ( max > graph->nvtxs * 0.3 )
		{
			printf("Will recursively partition cluster of size");
			printf(" %d\n", max);
			
			idxtype* newIds = idxmalloc(graph->nvtxs,"mlmcl:newIds");
			int newIdCounter=0;
			for ( i=0; i<graph->nvtxs; i++ )
			{
				if ( indices[i] == maxCluster )
					newIds[i]=newIdCounter++;
				else
					newIds[i]=-1;
			}
			
			GraphType *new_graph;
			getSubgraph(graph, newIds, max, *wgtflag, &new_graph);

			idxtype *new_indices = idxmalloc(max,"mlmcl:new_indices");
			opt.coarsenTo = (int) round(((float) max 
							/ (float)graph->nvtxs) * opt.coarsenTo);
			mlmcl(&max,new_graph->xadj, new_graph->adjncy,
			new_graph->vwgt, new_graph->adjwgt, wgtflag,
			new_indices, opt );

			int new_npart = mapPartition( new_indices, max);
			for ( i=0; i<graph->nvtxs; i++ )
			{
				if ( newIds[i] > -1 )
				{
					int ni = new_indices[newIds[i]];
					if ( ni > 0 )
						indices[newIds[i]] = npart + ni - 1;
					else
						indices[newIds[i]] = maxCluster;
				}
			}
			
			printf("Recursive clustering yielded %d new",new_npart);
			printf(" clusters.");

			free(new_indices);
			free(newIds);
			free(new_graph->gdata);
			free(new_graph);

		}

	}
}