Пример #1
0
int main(int argc, char *argv[])
{
	int i, npart;
	idxtype *part;
	float ncut=0;
	GraphType graph;
	char filename[256],outputFile[256];
	int wgtflag = 0, addSelfLoop=1, outputFileGiven=0, txtFormat=0 ;
	int randomInit = 0;
	idxtype minEdgeWeight = 0;
	Options opt;
	timer TOTALTmr, METISTmr, IOTmr;

	initOptions(&opt);

	if (argc < 2) {
		print_help(argv[0]);
		exit(0);
	}
	
	for (argv++; *argv != NULL; argv++){
	    if ((*argv)[0] == '-')
		{
			int temp;
	      	switch ((*argv)[1])
			{
			case 'b':
			case 'B':
			  opt.penalty_power=atof(*(++argv));
			  break;
			case 'i':
			case 'I':
			  opt.gamma=atof(*(++argv));
			  break;
			case 'o':
			case 'O':
			  strcpy(outputFile,*(++argv));
			  outputFileGiven=1;
			  break;
			case 'D'://quality threshold. This is a post-processing step proposed in SR-MCL. If you dont want post-processing (this is what original MLR-MCL, R-MCL, MCL do, please set "-d 0"  
			case 'd':
			  opt.quality_threshold = atof(*(++argv));
			  break;
			case 'w':
			case 'W':
		          opt.weighted_density = true; 
			  break;

			case 'c':
			case 'C':
			  opt.coarsenTo= atoi(*(++argv));
			  break;
			default:
			  printf("Invalid option %s\n", *argv);
			  print_help(argv[0]);
			  exit(0);
			}
		}
	    else
		{
	      strcpy(filename, *argv);
	    }
	}  

	if ( randomInit > 0 )
		InitRandom(time(NULL));
	else
		InitRandom(-1);

	cleartimer(TOTALTmr);
	cleartimer(METISTmr);
	cleartimer(IOTmr);

	starttimer(TOTALTmr);
	starttimer(IOTmr);

	ReadGraph(&graph, filename, &wgtflag, addSelfLoop, txtFormat);

	if ( opt.matchType == MATCH_UNSPECIFIED )
	{
//		opt.matchType = (graph.nvtxs>50000) ? MATCH_POWERLAW_FC :
//							MATCH_SHEMN;
		opt.matchType = MATCH_SHEMN;
	}
	
	stoptimer(IOTmr);

	if (graph.nvtxs <= 0) {
	  printf("Empty graph. Nothing to do.\n");
	  exit(0);
	}

	int noOfSingletons = 0; 
	GraphType *noSingletonGraph ;
	idxtype* nodeMap = lookForSingletons(&graph, &noOfSingletons);
	if ( noOfSingletons > 0 )
	{
		getSubgraph(&graph, nodeMap, graph.nvtxs-noOfSingletons, 
						wgtflag, &noSingletonGraph);
		GKfree((void**)&(graph.xadj), (void**)&(graph.adjncy), LTERM);
		if ( wgtflag&1 > 0 )
			GKfree( (void**)&(graph.adjwgt), LTERM);
//		free(graph.gdata);
		printf("Found %d singleton nodes in the", noOfSingletons);
		printf(" input graph. Removing them.\n");
	}

	if ( !outputFileGiven )
	{
		strcpy(outputFile, filename);

		sprintf(outputFile,"%s.c%d.i%1.1f.b%1.1f",outputFile,opt.coarsenTo,opt.gamma,opt.penalty_power);
	}
	
	printf("Input graph information ---------------------------------------------------\n");
	printf("  Name: %s, #Vertices: %d, #Edges: %d\n", filename, graph.nvtxs, graph.nedges/2);
	printf("Output shall be placed in the file %s\n",
	outputFile);
	fflush(stdout);

	part = idxmalloc(graph.nvtxs, "main: part");

	printf("------------------------------------------------\n");
	printf("Clustering....\n");
	fflush(stdout);
	starttimer(METISTmr);         //YK: main algorithm starts here!

	if ( noOfSingletons > 0 )
	{
		
		mlmcl(&(noSingletonGraph->nvtxs), noSingletonGraph->xadj, noSingletonGraph->adjncy,
			noSingletonGraph->vwgt,noSingletonGraph->adjwgt, &wgtflag, part, opt ); 
	}
	else	
	{
		mlmcl(&graph.nvtxs, graph.xadj, graph.adjncy,graph.vwgt,
			graph.adjwgt, &wgtflag, part, opt ); 
	}

	stoptimer(METISTmr); 

	printf("------------------------------------------------\n");
	if ( noOfSingletons > 0 )
	{
		npart=mapPartition(part,noSingletonGraph->nvtxs);
		ncut=ComputeNCut(noSingletonGraph, part,npart);
//		printf("In graph that does not include singletons,");
//		printf("No. of Clusters: %d, N-Cut value: %.2f\n",npart,ncut);


		idxtype *clusterSizes = histogram(part,
					graph.nvtxs-noOfSingletons, npart);

		int maxSize = clusterSizes[idxamax(npart, clusterSizes)];
		float avgClusterSize =
						(graph.nvtxs-noOfSingletons)*1.0/(npart);
		float balance =	(maxSize*1.0) /
				((graph.nvtxs-noOfSingletons)*1.0/npart);
		float stdDevn = stdDeviation(clusterSizes, npart);
		float avgNcut = ncut * 1.0/npart;
		float normStdDevn = stdDevn/avgClusterSize;

	// Warning: This computation only works if the singletons
	// have been placed in their own clusters. This works for
	// MLR-MCL, in other words, because it is guaranteed to
	// place singletons in their own clusters.
		printf("Output statistics for graph without singletons\n");
		printf("Clusters: %d N-Cut: %.3f", 
					npart, ncut);
		printf(" AvgN-Cut: %.3f Balance in cluster sizes: %.2f ",avgNcut,
					balance); 
		printf("Std_Deviation in cluster sizes: %.2f ", stdDevn);
		printf("Coefficient_of_Variation: %.2f\n", normStdDevn);

		free( clusterSizes );

		npart += noOfSingletons;
	//	ncut += noOfSingletons;
		printf("Output statistics for original graph\n");

		mapIndices(part, nodeMap, graph.nvtxs, npart-noOfSingletons);
	}
	else
	{
		npart=mapPartition(part,graph.nvtxs);
		ncut=ComputeNCut(&graph, part,npart);
	}

	idxtype* clusterSizes = histogram(part, graph.nvtxs, npart);
	int maxSize = clusterSizes[idxamax(npart, clusterSizes)];
	float avgClusterSize = (graph.nvtxs)*1.0/(npart);
	float balance = (maxSize*1.0)/(graph.nvtxs*1.0/npart);
	float stdDevn = stdDeviation(clusterSizes, npart);
	float avgNcut = ncut * 1.0/npart;
	float normStdDevn = stdDevn/avgClusterSize;
	
	printf("Clusters: %d N-Cut: %.3f AvgN-Cut: %.3f", npart,
						ncut, avgNcut );
	printf(" Balance in cluster sizes: %.2f Std.Deviation in cluster sizes: %.2f ",
				 balance, stdDevn);
	printf("Coefficient_of_Variation: %.2f\n", normStdDevn);

	starttimer(IOTmr);
	my_WritePartition(outputFile, part, graph.nvtxs, opt.gamma); 
	if ( noOfSingletons > 0 )
	{
		free(nodeMap);
		nodeMap = NULL;
	}

	printf("\nOutput is written to file: %s\n", outputFile);
	stoptimer(IOTmr);
	stoptimer(TOTALTmr);
	
	printf("\nTiming Information --------------------------------------------------\n");
	printf("  I/O:          \t\t %7.3f\n", gettimer(IOTmr));
	printf("  Partitioning: \t\t %7.3f   (MLR-MCL time)\n", gettimer(METISTmr));
	printf("  Total:        \t\t %7.3f\n", gettimer(TOTALTmr));
	printf("**********************************************************************\n");


	GKfree((void**)&graph.xadj, (void**)&graph.adjncy, (void**)&graph.vwgt, 
				(void**)&graph.adjwgt, (void**)&part, LTERM);
}  
Пример #2
0
/*************************************************************************
* multi-level weighted kernel k-means main function
**************************************************************************/
Graclus normalizedCut(char* filename, int nparts)
{
  Graclus ncData;
  int options[11];
  idxtype *part;  // cluster result stored in array part
  float rubvec[MAXNCON], lbvec[MAXNCON];
  GraphType graph;
  int numflag = 0, wgtflag = 0, edgecut, chain_length = 0;
  int no_args = 1, levels = 0;


  no_args = 0;
  
  if (nparts < 2) 
  {
    printf("The number of partitions should be greater than 1!\n");
    exit(0);
  }

  ReadGraph(&graph, filename, &wgtflag);
  if (graph.nvtxs <= 0) 
  {
    puts("Empty graph. Nothing to do.\n");
    exit(0);
  }

	levels = amax((graph.nvtxs)/(40*log2_metis(nparts)), 20*(nparts));
  
  // if(graph.ncon > 1)
  //   printf("  Balancing Constraints: %d\n", graph.ncon);

  part = idxmalloc(graph.nvtxs, "main: part");
  options[0] = 0;

  // printf("#Clusters: %d\n", nparts);
  if (graph.ncon == 1) 
  {
    MLKKM_PartGraphKway(&graph.nvtxs, graph.xadj, graph.adjncy, graph.vwgt, graph.adjwgt, 
			  &wgtflag, &numflag, &nparts, &chain_length, options, &edgecut, part, levels);
  }
  else 
  {
    int i;
    for (i = 0; i < graph.ncon; i++)
      rubvec[i] = HORIZONTAL_IMBALANCE;
  }

  ComputePartitionBalance(&graph, nparts, part, lbvec);
  ComputeNCut(&graph, part, nparts);
  
  //for(int i = 0; i < graph.nvtxs; i++) printf("%d\n", part[i]);
  //int clusterNum = graph.nvtxs;

  ncData.part = part;
  ncData.clusterNum = graph.nvtxs;

  //GKfree((void **) &graph.xadj, (void **) &graph.adjncy, (void **) &graph.vwgt, (void **) &graph.adjwgt, (void **) &part, LTERM);
  GKfree((void **) &graph.xadj, (void **) &graph.adjncy, (void **) &graph.vwgt, (void **) &graph.adjwgt, LTERM);  

  return ncData;
}
Пример #3
0
void MLKKMRefine(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, int nparts, int chain_length, float *tpwgts, float ubfactor)
{
  int i, nlevels, mustfree=0, temp_cl;
  GraphType *ptr;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr));

  /* Compute the parameters of the coarsest graph */
  ComputeKWayPartitionParams(ctrl, graph, nparts);
  temp_cl = chain_length;

  /* Determine how many levels are there */
  for (ptr=graph, nlevels=0; ptr!=orggraph; ptr=ptr->finer, nlevels++); 
  //printf("Number of levels is %d\n", nlevels);

  for (i=0; ;i++) {
    timer tmr;
    float result;

    cleartimer(tmr);
    starttimer(tmr);
    
    //pingpong(ctrl, graph, nparts, chain_length, tpwgts, ubfactor);
    //chain_length /= 1.5;
    //printf("Level: %d\n", i+1);
    
    if (graph == orggraph){
      //chain_length = chain_length>0 ? chain_length : 1;
      pingpong(ctrl, graph, nparts, chain_length, tpwgts, ubfactor, 1);
      break;
    }
    else{
      //pingpong(ctrl, graph, nparts, 0, tpwgts, ubfactor, 0);
      pingpong(ctrl, graph, nparts, chain_length, tpwgts, ubfactor, 0);
      //chain_length /= 2;
    }
    
    
    //pingpong(ctrl, graph, nparts, chain_length, tpwgts, ubfactor);
    
    //    /* for time and quality each level 
    
    stoptimer(tmr);
    //printf("Level %d: %7.3f", i+1, tmr);
    if (cutType == NCUT){
      result = ComputeNCut(graph, graph->where, nparts);
      //printf("   %7f", result);
    }
    else{
      result = ComputeRAsso(graph, graph->where, nparts);
      //printf("   %7f", result);
    }
    //printf(" (%d)\n\n", graph->nvtxs);
    //ends here*/

    if (graph == orggraph)
      break;
    /*
    if(i>1)
      chain_length /= 10;
    */

    GKfree((void **) &graph->gdata, LTERM);  /* Deallocate the graph related arrays */
    graph = graph->finer;
    IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr));
    if (graph->vwgt == NULL) {
      graph->vwgt = idxsmalloc(graph->nvtxs, 1, "RefineKWay: graph->vwgt");
      graph->adjwgt = idxsmalloc(graph->nedges, 1, "RefineKWay: graph->adjwgt");
      mustfree = 1;
    }
    ProjectKWayPartition(ctrl, graph, nparts);
    IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr));
  }
  
  if (mustfree) 
    GKfree((void **) &graph->vwgt, (void **) &graph->adjwgt, LTERM);

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr));
}
void mlmcl(int* nvtxs, idxtype* xadj, idxtype* adjncy, idxtype
*vwgt, idxtype* adjwgt, int* wgtflag, idxtype* indices, Options opt)
{
 /*	GraphType graph;
	my_SetUpGraph(&graph, *nvtxs, xadj, adjncy, vwgt, adjwgt,
	*wgtflag, 1); */
	int hubRemoval=opt.hubRemoval, recursiveCluster=0;
	float hub_pct = opt.hubPct;

	GraphType *graph = (GraphType*)malloc(sizeof(GraphType));
	my_SetUpGraph(graph, *nvtxs, xadj, adjncy, vwgt, adjwgt,
	*wgtflag, 1);
	// The last argument indicates we are setting up the original
	// graph 

	idxtype* newIds;
	if ( hubRemoval > 0 )
	{
		int hubThreshold = (int) floor(hub_pct * graph->nvtxs);
		GraphType *new_graph;
		newIds = removeHubs(graph, hubThreshold, *wgtflag,
						&new_graph, 0);
		free(graph->gdata);
		free(graph);
		graph = new_graph;
		
		// now need to remove any nodes that became singletons
		// because of hub removal.

		// we'll do another iteration of newIds, so back up 
		// the old newIds. newIds_bkp is of size *nvtxs.
		idxtype *newIds_bkp = newIds;

		int noOfSingletons = 0, newIdCounter;
		newIds = lookForSingletons(graph, &noOfSingletons);
		newIdCounter = graph->nvtxs - noOfSingletons;

		if ( noOfSingletons > 0 )
		{
			printf("%d nodes became singletons due to hub removal", 
						noOfSingletons );
			printf("; they will be removed.\n");
			fflush(stdout);

			getSubgraph(graph, newIds, newIdCounter, *wgtflag, 
							&new_graph);
			free(graph->gdata);
			free(graph);
			graph = new_graph;
			
			int i;
			for ( i=0; i<*nvtxs; i++ )
			{
				if ( newIds_bkp[i] > -1 )
				{
					newIds_bkp[i] = newIds[newIds_bkp[i]];
				}
				else
					newIds_bkp[i] = -1;
			}
			free(newIds);
		}

		newIds=newIds_bkp;
	}

//	printf("nnz:%d\n",graph.xadj[*nvtxs]);
	if ( opt.mis_coarsenType > 0 )
	{
//		mis_mlrmcl(graph, indices, opt); 
	}
	else
	{
		mlmclWithGraph(graph, indices, opt);
	}

	if ( hubRemoval > 0 )
	{
		int npart=mapPartition(indices, graph->nvtxs);
		float ncut=ComputeNCut(graph, indices, npart);
		printf("In graph that does not include hubs,"); 
		printf("No. of Clusters:%d, N-Cut value: %.2f\n", npart, ncut);

		mapIndices(indices, newIds, *nvtxs, npart);
		free(newIds);
		if ( *nvtxs - graph->nvtxs > 0 )
		{
			char filename[256];
			sprintf(filename, "input.nohubs.%.3f", hub_pct);
			WriteGraph(filename, graph->nvtxs, graph->xadj,
			graph->adjncy);
			printf("Wrote nohubs graph to %s\n", filename);
		}
	}

	if ( recursiveCluster > 0 )
	{
		int npart = mapPartition(indices, graph->nvtxs);
		float ncut = ComputeNCut(graph, indices, npart);
		printf("No. of clusters:%d, N-Cut:%.2f\n", npart, ncut);
		idxtype* hist = histogram(indices, graph->nvtxs, npart);

		int max=0, i=0, maxCluster=-1;
		for( i=0; i<npart; i++ )
		{
			if ( hist[i] > max )
			{
				max = hist[i];
				maxCluster = i;
			}
		}

		free(hist);

		if ( max > graph->nvtxs * 0.3 )
		{
			printf("Will recursively partition cluster of size");
			printf(" %d\n", max);
			
			idxtype* newIds = idxmalloc(graph->nvtxs,"mlmcl:newIds");
			int newIdCounter=0;
			for ( i=0; i<graph->nvtxs; i++ )
			{
				if ( indices[i] == maxCluster )
					newIds[i]=newIdCounter++;
				else
					newIds[i]=-1;
			}
			
			GraphType *new_graph;
			getSubgraph(graph, newIds, max, *wgtflag, &new_graph);

			idxtype *new_indices = idxmalloc(max,"mlmcl:new_indices");
			opt.coarsenTo = (int) round(((float) max 
							/ (float)graph->nvtxs) * opt.coarsenTo);
			mlmcl(&max,new_graph->xadj, new_graph->adjncy,
			new_graph->vwgt, new_graph->adjwgt, wgtflag,
			new_indices, opt );

			int new_npart = mapPartition( new_indices, max);
			for ( i=0; i<graph->nvtxs; i++ )
			{
				if ( newIds[i] > -1 )
				{
					int ni = new_indices[newIds[i]];
					if ( ni > 0 )
						indices[newIds[i]] = npart + ni - 1;
					else
						indices[newIds[i]] = maxCluster;
				}
			}
			
			printf("Recursive clustering yielded %d new",new_npart);
			printf(" clusters.");

			free(new_indices);
			free(newIds);
			free(new_graph->gdata);
			free(new_graph);

		}

	}
}