/*---< main() >-------------------------------------------------------------*/ int main(int argc, char **argv) { int opt; extern char *optarg; extern int optind; int i, j; int isBinaryFile, is_output_timing; int numClusters, numCoords, numObjs; int *membership; /* [numObjs] */ char *filename; float **objects; /* [numObjs][numCoords] data objects */ float **clusters; /* [numClusters][numCoords] cluster center */ float threshold; double timing, io_timing, clustering_timing; /* some default values */ _debug = 0; threshold = 0.001; numClusters = 0; isBinaryFile = 0; is_output_timing = 0; filename = NULL; while ( (opt=getopt(argc,argv,"p:i:n:t:abdo"))!= EOF) { switch (opt) { case 'i': filename=optarg; break; case 'b': isBinaryFile = 1; break; case 't': threshold=atof(optarg); break; case 'n': numClusters = atoi(optarg); break; case 'o': is_output_timing = 1; break; case 'd': _debug = 1; break; case '?': usage(argv[0], threshold); break; default: usage(argv[0], threshold); break; } } if (filename == 0 || numClusters <= 1) usage(argv[0], threshold); if (is_output_timing) io_timing = wtime(); /* read data points from file ------------------------------------------*/ objects = file_read(isBinaryFile, filename, &numObjs, &numCoords); if (objects == NULL) exit(1); if (is_output_timing) { timing = wtime(); io_timing = timing - io_timing; clustering_timing = timing; } /* start the timer for the core computation -----------------------------*/ /* membership: the cluster id for each data object */ membership = (int*) malloc(numObjs * sizeof(int)); assert(membership != NULL); clusters = seq_kmeans(objects, numCoords, numObjs, numClusters, threshold, membership); free(objects[0]); free(objects); if (is_output_timing) { timing = wtime(); clustering_timing = timing - clustering_timing; } /* output: the coordinates of the cluster centres ----------------------*/ file_write(filename, numClusters, numObjs, numCoords, clusters, membership); free(membership); free(clusters[0]); free(clusters); /*---- output performance numbers ---------------------------------------*/ if (is_output_timing) { io_timing += wtime() - timing; printf("\nPerforming **** Regular Kmeans (sequential version) ****\n"); printf("Input file: %s\n", filename); printf("numObjs = %d\n", numObjs); printf("numCoords = %d\n", numCoords); printf("numClusters = %d\n", numClusters); printf("threshold = %.4f\n", threshold); printf("I/O time = %10.4f sec\n", io_timing); printf("Computation timing = %10.4f sec\n", clustering_timing); } return(0); }
float calculate_clustering_cost(int **InputDsm, int size) { float ans = -1.0; int **dsm; dsm = (int**) malloc(sizeof(int*)*size); int row; for(row=0;row<size;row++){ dsm[row] = (int*)malloc(sizeof(int)*size); } //dsm = InputDsm; //memcpy(dsm, InputDsm,sizeof(int)*size*size); //Copy input dsm to dsm int i,j; for(i=0;i<size;i++){ for(j=0;j<size;j++){ dsm[i][j]=InputDsm[i][j]; //printf("%i,",dsm[i][j]); } //printf("\n"); } int *buses; buses = malloc(sizeof(int)); int busesSize=0; int *coordsX; int *coordsY; coordsX= malloc(sizeof(int)); coordsY= malloc(sizeof(int)); int coordsSize=0; int *coordsProjected; coordsProjected = malloc(sizeof(int)); int *coordsProjectedX; coordsProjectedX = malloc(sizeof(int)); int coordsProjectedSize=0; //STEP 1. GET VERTICAL BUSES //calculateVerticalBuses( dsm, &size, ALPHA, &buses, &busesSize, &coordsX, &coordsY, &coordsSize, &coordsProjected, &coordsProjectedSize); int colCount; float x,y,z,w; //Calibration with different alpha values int al[7] = {40,35,30,25,20,15,10}; //int al[7] = {10,15,20,25,30,35,40}; double delta = (size*15)/100; int aa,exit=0,last=0,lastt=0; for(aa=0;aa<7;aa++) { int alpha = al[aa]; for(i=0; i<size; i++) { if(aa==0) { colCount=0; for(j=0;j<size;j++){ if(dsm[i][j] != 0){ colCount +=1; coordsX = realloc(coordsX,sizeof(int)*(coordsSize+1)); coordsY = realloc(coordsY,sizeof(int)*(coordsSize+1)); coordsX[coordsSize]=i; coordsY[coordsSize]=j; coordsSize+=1; coordsProjected = realloc(coordsProjected, sizeof(int)*(coordsProjectedSize+1)); coordsProjected[coordsProjectedSize] = i; coordsProjectedX = realloc(coordsProjectedX, sizeof(int)*(coordsProjectedSize+1)); coordsProjectedX[coordsProjectedSize] = j; coordsProjectedSize+=1; } } } if(colCount!=0){ y = (float)colCount; z = (float)size; w = (float) 100/z; x = (float)w*y; if(x>alpha){ //printf("busesSize:%i\n",busesSize); buses = realloc(buses,sizeof(int)*(busesSize+1)); buses[busesSize]=i; busesSize+=1; } y,z,w,x=0; } } printf("num buses: %i with alpha :%i last:%i\n", busesSize,alpha,last); //last posbility - stay with this result from aa==7 if(exit==1 || aa==7) break; else if((busesSize)>delta) { //With biggest alpha we have already a significant amount of buses if(aa==0) break; //Go back and recalculate last buses resulta based on alpha aa-1 aa-=2; exit=1; } else last=busesSize; //RESTART ALL VALUES busesSize=0; buses = realloc(buses,sizeof(int)); } //STEP 2. REMOVE DEPENDENCIES RELATED TO BUSES int deletes=0; //int i; for(i=0;i<coordsSize;i++){ //printf("x 0: %i \n", coordsX[i]); if((int)isInList(coordsX[i], buses, busesSize)==1) { //printf("entro\n"); remove_element(coordsProjected, coordsProjectedSize, i-deletes); coordsProjectedSize-=1; deletes+=1; } } printf("num buses: %i\n", busesSize); printf("num coords: %i\n", coordsSize); //printf("bus 0: %i \n", buses[0]); //printArray(buses,busesSize); //printArray(coordsX,coordsSize); //printArray(coordsY,coordsSize); //printArray(coordsProjected,coordsProjectedSize); //printArray(coordsProjectedX,coordsProjectedSize); //STEP 3. FIND CLUSTERS FROM DEPENDENCIES int **clusterBEST; int *clusterSizeBEST; int greater=0; clusterSizeBEST = (int*)malloc(sizeof(int)); clusterBEST =(int**)malloc(sizeof(int*)); int sseBEST=-1; int numClustersBEST=0; int numClusters; float **obj; float **clusters; int **cluster; int *clusterSize; int *membership; float *sortedCent; int ks[8]={2,3,4,6,8,12,16,20}; int k; for(k=0;k<8;k++) { numClusters = ks[k]; if(numClusters>coordsProjectedSize){ return -1; } obj = malloc(coordsProjectedSize*sizeof(float*)); int rr; for(rr=0;rr<coordsProjectedSize;rr++){ obj[rr]= malloc(sizeof(float)); obj[rr][0]=coordsProjected[rr]; } membership = (int*)malloc(coordsProjectedSize*sizeof(int)); clusters = seq_kmeans(obj, 1 ,coordsProjectedSize, numClusters, 0.001 , membership); //STEP 4. SORT ASCENDING THE CENTROIDS int sortSize=0; sortedCent = malloc(numClusters*sizeof(float)); for(i=0;i<numClusters;i++) sortedCent[i]=-1; int ult =-1; for(i=0;i<numClusters;i++){ if(ult!=-1){ for(j=0;j<sortSize;j++){ if(clusters[i][0]<sortedCent[j]){ add_element(sortedCent, sortSize, (float) clusters[i][0], j); //sortedCent[j]=clusters[i][0]; sortSize+=1; ult= i; break; } } if(ult!=i) { add_element(sortedCent, sortSize, clusters[i][0], sortSize); //sortedCent[sortSize]=clusters[i][0]; sortSize+=1; } } else{ add_element(sortedCent, sortSize, clusters[i][0], 0); //sortedCent[sortSize]=clusters[i][0]; sortSize+=1; ult=i; } } /* printf("centroid 0 %f\n", clusters[0][0]); printf("centroid 1 %f\n", clusters[1][0]); printf("centroid 2 %f\n", clusters[2][0]); printf("sort centroid 0 %f\n", sortedCent[0]); printf("sort centroid 1 %f\n", sortedCent[1]); printf("sort centroid 2 %f\n", sortedCent[2]); */ //Asign X,Y dependency coordinates to each identify cluster cluster = (int**)malloc((numClusters+1)*sizeof(int*)); clusterSize = (int*)malloc((numClusters+1)*sizeof(int)); for(row=0;row<numClusters+1;row++) { cluster[row]=(int*)malloc(sizeof(int)); clusterSize[row]=0; } float limitA, limitB; int sse=0; for(i=0;i<coordsProjectedSize;i++) { //CHECK first if dependency is not outside the cluster extricly in the diagonal of the dsm int clNum = membership[i]; float cc = clusters[clNum][0]; int indx; indx = getIndexCluster(sortedCent, numClusters , cc); float lastCentroid=0,centroid=0,nextCentroid=0; //printArrayFloat(sortedCent,numClusters); centroid=cc; if(indx!=0 && indx!=-1) lastCentroid = sortedCent[indx-1]; else lastCentroid=0.0; if(indx!=numClusters-1 && indx!=-1) nextCentroid = sortedCent[indx+1]; else nextCentroid=size; //printf("lastC:%f cent:%f next:%f\n",lastCentroid,centroid,nextCentroid); if(lastCentroid>centroid) printf("********LIST NOT ORDENED!!!"); if(indx==0) limitA=0; else limitA = ((centroid-lastCentroid)/2.0)+lastCentroid ; if(indx==numClusters-1) limitB=size; else limitB = ((nextCentroid-centroid)/2.0)+centroid; int y = coordsProjected[i]; int x = coordsProjectedX[i]; //printf("y:%i x:%i limitA:%f limitB:%f\n",y, x,limitA, limitB); //printf("limA:%d limB:%d coordY:%i coordX:%i\n", limitA,limitB, coordsProjected[i],coordsProjectedX[i]); if(y>=limitA && y<=limitB && x>=limitA && x<=limitB) { cluster[membership[i]]=realloc(cluster[membership[i]], sizeof(int)*(clusterSize[membership[i]]+1)); cluster[membership[i]][clusterSize[membership[i]]]=coordsProjected[i]; clusterSize[membership[i]]+=1; } else { //Dependency has no cluster, added to las cluster list for extra penatilization //printf("no in cluster really\n"); cluster[numClusters]=realloc(cluster[numClusters], sizeof(int)*(clusterSize[numClusters]+1)); cluster[numClusters][clusterSize[numClusters]]=coordsProjected[i]; clusterSize[numClusters]+=1; } //Evaluate number of clusters by SSE (sum of squared error) int cl = membership[i]; int yCl = clusters[cl][0]; int dis = abs(coordsProjected[i]-yCl); sse+=pow(dis,2); //printf("cluster num:%i with size:%i - dep in y=%i centroid in:%i dis:%i sse:%i\n", membership[i],clusterSize[membership[i]], coordsProjected[i],yCl,dis,sse ); } int skip =0; //LOCK FOR THREASHOLD ON CHANGE LESS THAN 20% if(sseBEST!=-1 && sseBEST*THH>sseBEST-sse) { //break; skip=1; } printf("SSE for %i clusters : %i \n", numClusters, sse); if((sseBEST>sse || sseBEST==-1) && skip==0){ //free last cluster info first if(sseBEST!=-1){ int row; for(row=0;row<greater+1;row++) { free(clusterBEST[row]); } } numClustersBEST = numClusters; clusterSizeBEST = (int*)realloc(clusterSizeBEST,(numClusters+1)*sizeof(int)); clusterBEST =(int**)realloc(clusterBEST,(numClusters+1)*sizeof(int*)); int r,q; for(r=0;r<numClusters+1;r++) { int ct; if(r!=numClusters) ct = membership[r]; else ct = numClusters; int g = clusterSize[r]; greater = numClusters; //printf("size cluster r:%i is :%i\n",r,g); clusterBEST[r]= malloc(g*sizeof(int)); clusterSizeBEST[r]=g; for(q=0;q<g;q++) { clusterBEST[r][q] = cluster[r][q]; } } sseBEST=sse; } for(row=0;row<coordsProjectedSize;row++) { free(obj[row]); } free(obj); free(clusters[0]); free(clusters); free(membership); for(row=0;row<numClusters+1;row++) { free(cluster[row]); } free(cluster); free(clusterSize); free(sortedCent); if(skip==1) break; }//End loop to find best k for kmenas with sse printf("BEST SSE for %i clusters : %i\n", numClustersBEST, sseBEST); //CLUSTERING COST //IMPORTANT! DEPENDENCEIS OUTSIDE ANY CLUSTER ARE IN cluster[NumCluster], that is in last "cluster" float cost =0; float clusterCost; float depCost; for(i=0;i<numClustersBEST+1;i++) { clusterCost=0; for(j=0;j<clusterSizeBEST[i];j++) { depCost=0; if(isInList(clusterBEST[i][j],buses,busesSize)==1) depCost =1; else { //cluster[i][j] show the j depdendency, based on the coordsX,Y numeration from cluster number i if((isInList(clusterBEST[i][j],clusterBEST[i],clusterSizeBEST[i]==1)) && i!=(numClustersBEST+1)) depCost = pow(clusterSizeBEST[i],LAMD); else depCost = pow(size, LAMD); } clusterCost+=depCost; } cost+=clusterCost; } printf("CLUTERING COST: %f\n", cost); ans = cost; //printf("numClusters best free :%i\n",numClustersBEST); for(row=0;row<numClustersBEST+1;row++) { free(clusterBEST[row]); } free(clusterBEST); free(clusterSizeBEST); printf("Clustering cost calculated. free memory..."); free(buses); free(coordsX); free(coordsY); free(coordsProjected); free(coordsProjectedX); for(row=0;row<size;row++) { free(dsm[row]); } free(dsm); printf("done\n"); return ans; }
/*---< main() >-------------------------------------------------------------*/ int main(int argc, char **argv) { int opt; extern char *optarg; extern int optind; int isBinaryFile, is_output_timing, verbose; int numClusters, numCoords, numObjs; int *membership; /* [numObjs] */ char *filename; // TODO >> modified by VL: new variable representing the centers file name char *centers_filename; // TODO << end of the modification float **objects; /* [numObjs][numCoords] data objects */ float **clusters; /* [numClusters][numCoords] cluster center */ float threshold; double timing, io_timing, clustering_timing; /* some default values */ _debug = 0; verbose = 1; threshold = 0.001; numClusters = 0; isBinaryFile = 0; is_output_timing = 0; filename = NULL; // TODO >> modified by VL: initialization of the new variable centers_filename = NULL; // TODO << end of the modification // TODO >> modified by VL: added the letter z while ( (opt=getopt(argc,argv,"p:i:z:n:t:abdohq"))!= EOF) { // TODO << end of the modification switch (opt) { case 'i': filename=optarg; break; // TODO >> modified by VL: initialize centers filename case 'z': centers_filename=optarg; break; // TODO << end of the modification case 'b': isBinaryFile = 1; break; case 't': threshold=atof(optarg); break; case 'n': numClusters = atoi(optarg); break; case 'o': is_output_timing = 1; break; case 'q': verbose = 0; break; case 'd': _debug = 1; break; case 'h': default: usage(argv[0], threshold); break; } } if (filename == 0 || numClusters <= 1) usage(argv[0], threshold); if (is_output_timing) io_timing = wtime(); /* read data points from file ------------------------------------------*/ objects = file_read(isBinaryFile, filename, &numObjs, &numCoords); if (objects == NULL) exit(1); if (is_output_timing) { timing = wtime(); io_timing = timing - io_timing; clustering_timing = timing; } /* start the timer for the core computation -----------------------------*/ /* membership: the cluster id for each data object */ membership = (int*) malloc(numObjs * sizeof(int)); assert(membership != NULL); // TODO >> modified by VL: initialize variable "clusters" /* allocate a 2D space for returning variable clusters[] (coordinates of cluster centers) */ clusters = (float**) malloc(numClusters * sizeof(float*)); assert(clusters != NULL); clusters[0] = (float*) malloc(numClusters * numCoords * sizeof(float)); assert(clusters[0] != NULL); int i,j; for (i=1; i<numClusters; i++) clusters[i] = clusters[i-1] + numCoords; // possibly load the centers from a file if (centers_filename != NULL) { int num_centers, num_coords; clusters = file_read(0, centers_filename, &num_centers, &num_coords); // no control over the numbers of centers and coordinates } // otherwise, pick the first numClusters elements of objects[] as initial cluster centers else { for (i=0; i<numClusters; i++) for (j=0; j<numCoords; j++) clusters[i][j] = objects[i][j]; } // TODO << end of the modification // TODO >> modified by VL: added "clusters" as a parameter representing the initial centers clusters = seq_kmeans(objects, numCoords, numObjs, numClusters, clusters, threshold, membership); // TODO << end of the modification free(objects[0]); free(objects); if (is_output_timing) { timing = wtime(); clustering_timing = timing - clustering_timing; } /* output: the coordinates of the cluster centres ----------------------*/ file_write(filename, numClusters, numObjs, numCoords, clusters, membership, verbose); free(membership); free(clusters[0]); free(clusters); /*---- output performance numbers ---------------------------------------*/ if (is_output_timing) { io_timing += wtime() - timing; printf("\nPerforming **** Regular Kmeans (sequential version) ****\n"); printf("Input file: %s\n", filename); // TODO >> modified by VL: display centers filename if(centers_filename!=NULL) printf("Centers file: %s\n", centers_filename); // TODO << end of the modifications printf("numObjs = %d\n", numObjs); printf("numCoords = %d\n", numCoords); printf("numClusters = %d\n", numClusters); printf("threshold = %.4f\n", threshold); printf("I/O time = %10.4f sec\n", io_timing); printf("Computation timing = %10.4f sec\n", clustering_timing); } return(0); }