Пример #1
0
/*---< main() >-------------------------------------------------------------*/
int main(int argc, char **argv) {
           int     opt;
    extern char   *optarg;
    extern int     optind;
           int     i, j;
           int     isBinaryFile, is_output_timing;

           int     numClusters, numCoords, numObjs;
           int    *membership;    /* [numObjs] */
           char   *filename;
           float **objects;       /* [numObjs][numCoords] data objects */
           float **clusters;      /* [numClusters][numCoords] cluster center */
           float   threshold;
           double  timing, io_timing, clustering_timing;

    /* some default values */
    _debug           = 0;
    threshold        = 0.001;
    numClusters      = 0;
    isBinaryFile     = 0;
    is_output_timing = 0;
    filename         = NULL;

    while ( (opt=getopt(argc,argv,"p:i:n:t:abdo"))!= EOF) {
        switch (opt) {
            case 'i': filename=optarg;
                      break;
            case 'b': isBinaryFile = 1;
                      break;
            case 't': threshold=atof(optarg);
                      break;
            case 'n': numClusters = atoi(optarg);
                      break;
            case 'o': is_output_timing = 1;
                      break;
            case 'd': _debug = 1;
                      break;
            case '?': usage(argv[0], threshold);
                      break;
            default: usage(argv[0], threshold);
                      break;
        }
    }

    if (filename == 0 || numClusters <= 1) usage(argv[0], threshold);

    if (is_output_timing) io_timing = wtime();

    /* read data points from file ------------------------------------------*/
    objects = file_read(isBinaryFile, filename, &numObjs, &numCoords);
    if (objects == NULL) exit(1);

    if (is_output_timing) {
        timing            = wtime();
        io_timing         = timing - io_timing;
        clustering_timing = timing;
    }

    /* start the timer for the core computation -----------------------------*/
    /* membership: the cluster id for each data object */
    membership = (int*) malloc(numObjs * sizeof(int));
    assert(membership != NULL);

    clusters = seq_kmeans(objects, numCoords, numObjs, numClusters, threshold,
                          membership);

    free(objects[0]);
    free(objects);

    if (is_output_timing) {
        timing            = wtime();
        clustering_timing = timing - clustering_timing;
    }

    /* output: the coordinates of the cluster centres ----------------------*/
    file_write(filename, numClusters, numObjs, numCoords, clusters,
               membership);

    free(membership);
    free(clusters[0]);
    free(clusters);

    /*---- output performance numbers ---------------------------------------*/
    if (is_output_timing) {
        io_timing += wtime() - timing;
        printf("\nPerforming **** Regular Kmeans (sequential version) ****\n");

        printf("Input file:     %s\n", filename);
        printf("numObjs       = %d\n", numObjs);
        printf("numCoords     = %d\n", numCoords);
        printf("numClusters   = %d\n", numClusters);
        printf("threshold     = %.4f\n", threshold);

        printf("I/O time           = %10.4f sec\n", io_timing);
        printf("Computation timing = %10.4f sec\n", clustering_timing);
    }

    return(0);
}
Пример #2
0
float calculate_clustering_cost(int **InputDsm, int size)
{
    float ans = -1.0;

    int **dsm;
    dsm = (int**) malloc(sizeof(int*)*size);
    int row;
    for(row=0;row<size;row++){
	dsm[row] = (int*)malloc(sizeof(int)*size);
    }

    //dsm = InputDsm;
    //memcpy(dsm, InputDsm,sizeof(int)*size*size);
    //Copy input dsm to dsm
    int i,j;
    for(i=0;i<size;i++){
	for(j=0;j<size;j++){
	    dsm[i][j]=InputDsm[i][j];
	    //printf("%i,",dsm[i][j]);
	}
	//printf("\n");
    }



    int *buses;
    buses = malloc(sizeof(int));
    int busesSize=0;
    int *coordsX;
    int *coordsY;
    coordsX= malloc(sizeof(int));
    coordsY= malloc(sizeof(int));
    int coordsSize=0;
    int *coordsProjected;
    coordsProjected = malloc(sizeof(int));
    int *coordsProjectedX;
    coordsProjectedX = malloc(sizeof(int));
    int coordsProjectedSize=0;
 
 
 
    //STEP 1. GET VERTICAL BUSES 
    //calculateVerticalBuses( dsm, &size, ALPHA, &buses, &busesSize, &coordsX, &coordsY, &coordsSize, &coordsProjected, &coordsProjectedSize);	
    int colCount;
    float x,y,z,w;
    


   //Calibration with different alpha values
   int al[7] = {40,35,30,25,20,15,10};
   //int al[7] = {10,15,20,25,30,35,40};
   double delta = (size*15)/100;
   int aa,exit=0,last=0,lastt=0;
   for(aa=0;aa<7;aa++)
   {
     int alpha = al[aa];
     for(i=0; i<size; i++)
     {
	if(aa==0)
	{
	    colCount=0;
	    for(j=0;j<size;j++){
		
	        if(dsm[i][j] != 0){
		    colCount +=1;
                    coordsX = realloc(coordsX,sizeof(int)*(coordsSize+1));
                    coordsY = realloc(coordsY,sizeof(int)*(coordsSize+1));
	 	    coordsX[coordsSize]=i;
	 	    coordsY[coordsSize]=j;
		    coordsSize+=1;

  		    coordsProjected = realloc(coordsProjected, sizeof(int)*(coordsProjectedSize+1));
		    coordsProjected[coordsProjectedSize] = i;
  		    coordsProjectedX = realloc(coordsProjectedX, sizeof(int)*(coordsProjectedSize+1));
		    coordsProjectedX[coordsProjectedSize] = j;
		    coordsProjectedSize+=1;
	         }
	    }
	}
	if(colCount!=0){
	    y = (float)colCount;
	    z = (float)size;
	    w = (float) 100/z;
	    x = (float)w*y;
	    if(x>alpha){
   		//printf("busesSize:%i\n",busesSize);		
		buses = realloc(buses,sizeof(int)*(busesSize+1));
		buses[busesSize]=i;
		busesSize+=1;
	    }
	    y,z,w,x=0; 	
	}
    } 
     printf("num buses: %i with alpha :%i last:%i\n", busesSize,alpha,last); 
     //last posbility - stay with this result from aa==7
     if(exit==1 || aa==7)
	break;
     else if((busesSize)>delta)
     {
	//With biggest alpha we have already a significant amount of buses
	if(aa==0)
	    break; 	
	 //Go back and recalculate last buses resulta based on alpha aa-1 
	 aa-=2;	
	 exit=1;
     }
     else
	last=busesSize;
     //RESTART ALL VALUES
     busesSize=0;
     buses = realloc(buses,sizeof(int));		
}

    //STEP 2. REMOVE DEPENDENCIES RELATED TO BUSES	

    int deletes=0;
    //int i;
    for(i=0;i<coordsSize;i++){
        //printf("x 0: %i \n", coordsX[i]);  
	if((int)isInList(coordsX[i], buses, busesSize)==1)
	{
	    //printf("entro\n");
	    remove_element(coordsProjected, coordsProjectedSize, i-deletes);
	    coordsProjectedSize-=1;
	    deletes+=1;	    
	}
    }

   printf("num buses: %i\n", busesSize); 
   printf("num coords: %i\n", coordsSize); 
   //printf("bus 0: %i \n", buses[0]);  
   //printArray(buses,busesSize);
   //printArray(coordsX,coordsSize);
   //printArray(coordsY,coordsSize);
   //printArray(coordsProjected,coordsProjectedSize);
   //printArray(coordsProjectedX,coordsProjectedSize);

   //STEP 3. FIND CLUSTERS FROM DEPENDENCIES
   
   int **clusterBEST;
   int *clusterSizeBEST;
   int greater=0;
   clusterSizeBEST = (int*)malloc(sizeof(int));
   clusterBEST =(int**)malloc(sizeof(int*));
   int sseBEST=-1;
   int numClustersBEST=0; 

   int numClusters;
   float **obj;
   float **clusters;
   int **cluster;
   int *clusterSize;
   int *membership;
   float *sortedCent;

   int ks[8]={2,3,4,6,8,12,16,20};
   int k;	
   for(k=0;k<8;k++)
   { 

   numClusters = ks[k];

   if(numClusters>coordsProjectedSize){
	return -1;
   }
   obj = malloc(coordsProjectedSize*sizeof(float*));
   int rr;
   for(rr=0;rr<coordsProjectedSize;rr++){
       obj[rr]= malloc(sizeof(float));
       obj[rr][0]=coordsProjected[rr]; 
   }

   membership = (int*)malloc(coordsProjectedSize*sizeof(int));
   clusters =  seq_kmeans(obj, 1 ,coordsProjectedSize, numClusters, 0.001 , membership);
   
   //STEP 4. SORT ASCENDING THE CENTROIDS
   int sortSize=0;
   sortedCent = malloc(numClusters*sizeof(float));
   for(i=0;i<numClusters;i++)
	sortedCent[i]=-1;
   
   int ult =-1;
   for(i=0;i<numClusters;i++){
	if(ult!=-1){
	    for(j=0;j<sortSize;j++){
		if(clusters[i][0]<sortedCent[j]){
		    add_element(sortedCent, sortSize, (float) clusters[i][0], j);
		    //sortedCent[j]=clusters[i][0];
		    sortSize+=1;
		    ult= i;
                    break;
		}
	    }
	    if(ult!=i)
	    {
		add_element(sortedCent, sortSize, clusters[i][0], sortSize);
		//sortedCent[sortSize]=clusters[i][0];
		sortSize+=1;
	    }
	}
 	else{
 	    add_element(sortedCent, sortSize, clusters[i][0], 0);
	    //sortedCent[sortSize]=clusters[i][0];
	    sortSize+=1;
	    ult=i;
	}
   } 

  

   /*   
   printf("centroid 0 %f\n", clusters[0][0]);
   printf("centroid 1 %f\n", clusters[1][0]);
   printf("centroid 2 %f\n", clusters[2][0]);
   printf("sort centroid 0 %f\n", sortedCent[0]);
   printf("sort centroid 1 %f\n", sortedCent[1]);
   printf("sort centroid 2 %f\n", sortedCent[2]);
   */

   //Asign X,Y dependency coordinates to each identify cluster   
   cluster = (int**)malloc((numClusters+1)*sizeof(int*));
   clusterSize = (int*)malloc((numClusters+1)*sizeof(int));
   for(row=0;row<numClusters+1;row++)
   {
       cluster[row]=(int*)malloc(sizeof(int));
       clusterSize[row]=0;
   }
   float limitA, limitB; 
   int sse=0;
   for(i=0;i<coordsProjectedSize;i++)
   {
	//CHECK first if dependency is not outside the cluster extricly in the diagonal of the dsm
	int clNum = membership[i];
	float cc = clusters[clNum][0];
        int indx;
	indx = getIndexCluster(sortedCent, numClusters , cc);
	float lastCentroid=0,centroid=0,nextCentroid=0;

	//printArrayFloat(sortedCent,numClusters);
	centroid=cc;
	if(indx!=0 && indx!=-1)
	    lastCentroid = sortedCent[indx-1];
	else
            lastCentroid=0.0;
 	if(indx!=numClusters-1 && indx!=-1)
	    nextCentroid = sortedCent[indx+1];
	else
            nextCentroid=size;
	    
	//printf("lastC:%f  cent:%f next:%f\n",lastCentroid,centroid,nextCentroid);	

	if(lastCentroid>centroid)
	   printf("********LIST NOT ORDENED!!!");

	if(indx==0)
	   limitA=0;
	else
	   limitA = ((centroid-lastCentroid)/2.0)+lastCentroid ;

	if(indx==numClusters-1)
	   limitB=size;
	else
	   limitB = ((nextCentroid-centroid)/2.0)+centroid;

	int y = coordsProjected[i];
	int x = coordsProjectedX[i];
	//printf("y:%i x:%i  limitA:%f limitB:%f\n",y, x,limitA, limitB);
 	
	//printf("limA:%d limB:%d coordY:%i coordX:%i\n", limitA,limitB, coordsProjected[i],coordsProjectedX[i]);	
	if(y>=limitA && y<=limitB && x>=limitA && x<=limitB)
        {
	    cluster[membership[i]]=realloc(cluster[membership[i]], sizeof(int)*(clusterSize[membership[i]]+1));
	    cluster[membership[i]][clusterSize[membership[i]]]=coordsProjected[i];
 	    clusterSize[membership[i]]+=1;
	}
        else
	{
	    //Dependency has no cluster, added to las cluster list for extra penatilization
	    //printf("no in cluster really\n");
	    cluster[numClusters]=realloc(cluster[numClusters], sizeof(int)*(clusterSize[numClusters]+1));
	    cluster[numClusters][clusterSize[numClusters]]=coordsProjected[i];
 	    clusterSize[numClusters]+=1;
	}
 	//Evaluate number of clusters by SSE (sum of squared error)
	int  cl = membership[i];
	int yCl = clusters[cl][0];
	int dis = abs(coordsProjected[i]-yCl);	
        sse+=pow(dis,2);

	//printf("cluster num:%i with size:%i - dep in y=%i centroid in:%i dis:%i sse:%i\n", membership[i],clusterSize[membership[i]], coordsProjected[i],yCl,dis,sse );
   }		
   int skip =0;
   //LOCK FOR THREASHOLD ON CHANGE LESS THAN 20%
   if(sseBEST!=-1 && sseBEST*THH>sseBEST-sse)
   {
    //break;
    skip=1;
    }  

  
   printf("SSE for %i clusters : %i \n", numClusters, sse); 
   if((sseBEST>sse || sseBEST==-1) && skip==0){

	//free last cluster info first
	if(sseBEST!=-1){
	    int row;
	    for(row=0;row<greater+1;row++)
	    {
	      free(clusterBEST[row]);
	    }
        }
	numClustersBEST = numClusters;
	clusterSizeBEST = (int*)realloc(clusterSizeBEST,(numClusters+1)*sizeof(int));
	clusterBEST =(int**)realloc(clusterBEST,(numClusters+1)*sizeof(int*));
	int r,q;
	for(r=0;r<numClusters+1;r++)
	{
	    int ct;
	    if(r!=numClusters)
	       ct = membership[r];
	    else
	       ct = numClusters;

	    int g = clusterSize[r];
	    greater = numClusters;
	    //printf("size cluster r:%i is :%i\n",r,g);
	    clusterBEST[r]= malloc(g*sizeof(int));
	    clusterSizeBEST[r]=g;
	    for(q=0;q<g;q++)
	    {
		clusterBEST[r][q] = cluster[r][q];
	    }
	}
        sseBEST=sse;
   }

    for(row=0;row<coordsProjectedSize;row++)
    {
	free(obj[row]);
    }
    free(obj);
    
 
    free(clusters[0]);
    free(clusters);
    free(membership);
    for(row=0;row<numClusters+1;row++)
    {
	free(cluster[row]);
    }
    free(cluster);
    free(clusterSize);
    free(sortedCent);

    if(skip==1)
	break;

  }//End loop to find best k for kmenas with sse


   printf("BEST SSE for %i clusters : %i\n", numClustersBEST, sseBEST); 


  //CLUSTERING COST
  //IMPORTANT! DEPENDENCEIS OUTSIDE ANY CLUSTER ARE IN cluster[NumCluster], that is in last "cluster"
  float cost =0;
  float clusterCost;
  float depCost;
  for(i=0;i<numClustersBEST+1;i++)
  {  clusterCost=0; 

      for(j=0;j<clusterSizeBEST[i];j++)
      {
          depCost=0;
	  if(isInList(clusterBEST[i][j],buses,busesSize)==1)
	     depCost =1;
 	  else
	  {
	      //cluster[i][j] show the j depdendency, based on the coordsX,Y numeration from cluster number i
	      if((isInList(clusterBEST[i][j],clusterBEST[i],clusterSizeBEST[i]==1)) && i!=(numClustersBEST+1))
	          depCost = pow(clusterSizeBEST[i],LAMD);
  	      else
	          depCost = pow(size, LAMD);
	  }
      clusterCost+=depCost;
      }
      cost+=clusterCost;
  }


  printf("CLUTERING COST: %f\n", cost);
  ans = cost;
 
    //printf("numClusters best free :%i\n",numClustersBEST);
    for(row=0;row<numClustersBEST+1;row++)
    {
	free(clusterBEST[row]);
    }
    free(clusterBEST);
    free(clusterSizeBEST);


    printf("Clustering cost calculated. free memory...");
    free(buses);

    free(coordsX);
    free(coordsY);
    free(coordsProjected);
    free(coordsProjectedX);
    for(row=0;row<size;row++)
    {
	free(dsm[row]);
    }
    free(dsm);
    printf("done\n");
	

   return ans;	
} 
Пример #3
0
/*---< main() >-------------------------------------------------------------*/
int main(int argc, char **argv) {
           int     opt;
    extern char   *optarg;
    extern int     optind;
           int     isBinaryFile, is_output_timing, verbose;

           int     numClusters, numCoords, numObjs;
           int    *membership;    /* [numObjs] */
           char   *filename;
// TODO >> modified by VL: new variable representing the centers file name
           char   *centers_filename;
// TODO << end of the modification
           float **objects;       /* [numObjs][numCoords] data objects */
           float **clusters;      /* [numClusters][numCoords] cluster center */
           float   threshold;
           double  timing, io_timing, clustering_timing;

    /* some default values */
    _debug           = 0;
    verbose          = 1;
    threshold        = 0.001;
    numClusters      = 0;
    isBinaryFile     = 0;
    is_output_timing = 0;
    filename         = NULL;
// TODO >> modified by VL: initialization of the new variable
    centers_filename = NULL;
// TODO << end of the modification

// TODO >> modified by VL: added the letter z
    while ( (opt=getopt(argc,argv,"p:i:z:n:t:abdohq"))!= EOF) {
// TODO << end of the modification
        switch (opt) {
            case 'i': filename=optarg;
                      break;
// TODO >> modified by VL: initialize centers filename
            case 'z': centers_filename=optarg;
                      break;
// TODO << end of the modification
            case 'b': isBinaryFile = 1;
                      break;
            case 't': threshold=atof(optarg);
                      break;
            case 'n': numClusters = atoi(optarg);
                      break;
            case 'o': is_output_timing = 1;
                      break;
            case 'q': verbose = 0;
                      break;
            case 'd': _debug = 1;
                      break;
            case 'h':
            default: usage(argv[0], threshold);
                      break;
        }
    }

    if (filename == 0 || numClusters <= 1) usage(argv[0], threshold);

    if (is_output_timing) io_timing = wtime();

    /* read data points from file ------------------------------------------*/
    objects = file_read(isBinaryFile, filename, &numObjs, &numCoords);
    if (objects == NULL) exit(1);

    if (is_output_timing) {
        timing            = wtime();
        io_timing         = timing - io_timing;
        clustering_timing = timing;
    }

    /* start the timer for the core computation -----------------------------*/
    /* membership: the cluster id for each data object */
    membership = (int*) malloc(numObjs * sizeof(int));
    assert(membership != NULL);

// TODO >> modified by VL: initialize variable "clusters"
    /* allocate a 2D space for returning variable clusters[] (coordinates of cluster centers) */
    clusters    = (float**) malloc(numClusters *             sizeof(float*));
    assert(clusters != NULL);
    clusters[0] = (float*)  malloc(numClusters * numCoords * sizeof(float));
    assert(clusters[0] != NULL);
    int i,j;
    for (i=1; i<numClusters; i++)
        clusters[i] = clusters[i-1] + numCoords;

    // possibly load the centers from a file
    if (centers_filename != NULL)
    {	int num_centers, num_coords;
    	clusters = file_read(0, centers_filename, &num_centers, &num_coords);
    	// no control over the numbers of centers and coordinates
    }
	// otherwise, pick the first numClusters elements of objects[] as initial cluster centers
    else
    {	for (i=0; i<numClusters; i++)
    		for (j=0; j<numCoords; j++)
    			clusters[i][j] = objects[i][j];
    }
// TODO << end of the modification

// TODO >> modified by VL: added "clusters" as a parameter representing the initial centers
    clusters = seq_kmeans(objects, numCoords, numObjs, numClusters, clusters, threshold, membership);
// TODO << end of the modification

    free(objects[0]);
    free(objects);

    if (is_output_timing) {
        timing            = wtime();
        clustering_timing = timing - clustering_timing;
    }

    /* output: the coordinates of the cluster centres ----------------------*/
    file_write(filename, numClusters, numObjs, numCoords, clusters,
               membership, verbose);

    free(membership);
    free(clusters[0]);
    free(clusters);

    /*---- output performance numbers ---------------------------------------*/
    if (is_output_timing) {
        io_timing += wtime() - timing;
        printf("\nPerforming **** Regular Kmeans (sequential version) ****\n");

        printf("Input file:     %s\n", filename);
// TODO >> modified by VL: display centers filename
        if(centers_filename!=NULL)
        	printf("Centers file:     %s\n", centers_filename);
// TODO << end of the modifications
        printf("numObjs       = %d\n", numObjs);
        printf("numCoords     = %d\n", numCoords);
        printf("numClusters   = %d\n", numClusters);
        printf("threshold     = %.4f\n", threshold);

        printf("I/O time           = %10.4f sec\n", io_timing);
        printf("Computation timing = %10.4f sec\n", clustering_timing);
    }

    return(0);
}