Пример #1
0
/*----< find_nearest_cluster() >---------------------------------------------*/
__inline static
int find_nearest_cluster(int     numClusters, /* no. clusters */
                         int     numCoords,   /* no. coordinates */
                         float  *object,      /* [numCoords] */
                         float **clusters)    /* [numClusters][numCoords] */
{
    int   index, i, j;
    float dist, min_dist;
    
    /* find the cluster id that has min distance to object */
    index    = 0;
    min_dist = euclid_dist_2(numCoords, object, clusters[0]);
    
    dist = euclid_dist_2(numCoords, object, clusters[1]);
    if (dist < min_dist) { /* find the min and its array index */
        min_dist = dist;
        index    = 1;
    }
    dist = euclid_dist_2(numCoords, object, clusters[2]);
    if (dist < min_dist) { /* find the min and its array index */
        min_dist = dist;
        index    = 2;
    }
    return(index);
}
Пример #2
0
/*----< rms_err(): calculates RMSE of clustering
 * >-------------------------------------*/
float rms_err(float **feature, /* [npoints][nfeatures] */
              int nfeatures, int npoints,
              float **cluster_centres, /* [nclusters][nfeatures] */
              int nclusters) {
    int i;
    int nearest_cluster_index; /* cluster center id with min distance to pt */
    float sum_euclid = 0.0;    /* sum of Euclidean distance squares */
    float ret;                 /* return value */

/* calculate and sum the sqaure of euclidean distance*/
#pragma omp parallel for shared(feature, cluster_centres) firstprivate(        \
    npoints, nfeatures,                                                        \
    nclusters) private(i, nearest_cluster_index) schedule(static)
    for (i = 0; i < npoints; i++) {
        nearest_cluster_index = find_nearest_point(feature[i], nfeatures,
                                                   cluster_centres, nclusters);

        sum_euclid += euclid_dist_2(
            feature[i], cluster_centres[nearest_cluster_index], nfeatures);
    }
    /* divide by n, then take sqrt */
    ret = sqrt(sum_euclid / npoints);

    return (ret);
}
Пример #3
0
void *find_nearest_point(void *arg)
{ 
    int  i, j, l, start, end, len;
    long offset;
    offset=(long)arg;
    len=data1.cluster_len;
    start = offset*len;
    if(offset==(NUM_THREADS - 1))
        end = start + len + data1.rem;
    else
        end = start + len;
 

    /* find the cluster center id with min distance to a point */
    for (i=start; i<end; i++) {
        float max_dist=FLT_MAX;
        for(j=0; j<data1.nclusters; j++){
                 float dist;
                 dist = euclid_dist_2(data1.feature[i], data1.clusters[j], data1.nfeatures);  /* no need square root */
                    if (dist < max_dist) {
                          max_dist = dist;
                          data1.membership[i] = j;
                          data1.new_centers_len[j]++;   //the number of points contained by cluster with index 'j' is increased by '1'
	                      for (l=0; l<data1.nfeatures; l++)          
				          data1.new_centers[j][l] += data1.feature[i][l];
                    }
        }
    }
    pthread_exit(NULL);
}
/*
*	Function: find_nearest_cluster
*	------------------------------
*	Function determining the cluster center which is closest to the given object.
*	Returns the index of that cluster center.
*/
__inline static int find_nearest_cluster(int numClusters, int numCoords, double *object, double **clusters) {
    int   index, i;
    double dist, min_dist;

    /* find the cluster id that has min distance to object */
    index    = 0;
    min_dist = euclid_dist_2(numCoords, object, clusters[0]);
    for (i=1; i<numClusters; i++) {
        dist = euclid_dist_2(numCoords, object, clusters[i]);

        /* no need square root */
        if (dist < min_dist) { /* find the min and its array index */
            min_dist = dist;
            index    = i;
        }
    }
    return index;
}
Пример #5
0
/*----< find_nearest_cluster() >---------------------------------------------*/
__inline static
int find_nearest_cluster(int     numClusters, 
                         int     numCoords,   
                         float  *object,      /* [numCoords] */
                         float **clusters)    /* [numClusters][numCoords] */
{
    int   index, i;
    float dist, min_dist;

    index    = 0;
    min_dist = euclid_dist_2(numCoords, object, clusters[0]);

    for (i=1; i<numClusters; i++) {
        dist = euclid_dist_2(numCoords, object, clusters[i]);
        /* no need square root */
        if (dist < min_dist) { /* find the min and its array index */
            min_dist = dist;
            index    = i;
        }
    }
    return(index);
}
Пример #6
0
// ===========================================================================
// ===========================================================================
static inline int find_nearest_cluster(int num_clusters, float *object, 
                                       float *clusters) {
  int   index;
  int   i;
  float dist;
  float min_dist;

  // Find the cluster id that has min distance to object
  index = 0;
  min_dist = euclid_dist_2(object, clusters);

  for (i = 1; i < num_clusters; i++) {
    dist = euclid_dist_2(object, clusters + i * COORDS);

    // No need square root
    if (dist < min_dist) { // Find the min and its array index
      min_dist = dist;
      index = i;
    }
  }

  return index;
}
Пример #7
0
/*----< find_nearest_point() >-----------------------------------------------*/
__inline int find_nearest_point(float *pt, /* [nfeatures] */
                                int nfeatures,
                                float **pts, /* [npts][nfeatures] */
                                int npts) {
    int index, i;
    float max_dist = FLT_MAX;

    /* find the cluster center id with min distance to pt */
    for (i = 0; i < npts; i++) {
        float dist;
        dist = euclid_dist_2(pt, pts[i], nfeatures); /* no need square root */
        if (dist < max_dist) {
            max_dist = dist;
            index = i;
        }
    }
    return (index);
}
Пример #8
0
void *calc_distance(void *arg)
{
    int  i, j, start, end, len;
    float dist;
    long offset;
    offset=(long)arg;
    len=data1.cluster_len;
    start = offset*len;
    if(offset==(NUM_THREADS - 1))
        end = start + len + data1.rem;
    else
        end = start + len;
    
    
    for (i=start; i<end; i++){ 
              dist = euclid_dist_2(data1.feature[i], data1.clusters[data1.membership[i]], data1.nfeatures);  // no need square root 
                  pthread_mutex_lock(&mut1);
                  data1.total_sum+=dist;
                  pthread_mutex_unlock(&mut1);
    }
   
    pthread_exit(NULL);
}
Пример #9
0
/*----< find_nearest_cluster() >---------------------------------------------*/
__inline static
int find_nearest_cluster(int     numClusters, /* no. clusters */
                         int     numCoords,   /* no. coordinates */
                         float  *object,      /* [numCoords] */
                         float **clusters)    /* [numClusters][numCoords] */
{
    int   index, i;
    float dist, min_dist;

    /* find the cluster id that has min distance to object */
    index    = 0;
    min_dist = euclid_dist_2(numCoords, object, clusters[0]);

    //for (i=1; i<numClusters; i++) {
    //    dist = euclid_dist_2(numCoords, object, clusters[i]);
    //    /* no need square root */
    //    if (dist < min_dist) { /* find the min and its array index */
    //        min_dist = dist;
    //        index    = i;
    //    }
    //}
    return(index);
}
Пример #10
0
int mpi_kmeans(float    **objects,     /* in: [numObjs][numCoords] */
               int        numCoords,   
               int        numObjs,     
               int        numClusters,  
               float      threshold,   /* % objects change membership */
               int       *membership,  /* out: [numObjs] membership of points with parent cluster */
               float    **clusters,    /* out: [numClusters][numCoords] */
               MPI_Comm   comm)        /* MPI communicator */
{
    int      i, j, rank, index, loop=0, total_numObjs , done=1;
    int     *newClusterSize; /* [numClusters]: no. objects assigned in each
                                new cluster */
    int     *clusterSize;    /* [numClusters]: temp buffer for Allreduce */
    float    no_of_changes;          /* % of objects change their clusters */
    float    no_of_changes_tmp;
    float  **newClusters;    /* [numClusters][numCoords] */
    extern int _debug;
    float **temp1;
    float result = 0.0;
    int k=0;
    
    

    if (_debug) MPI_Comm_rank(comm, &rank);

    /* initialize membership[] */
    for (i=0; i<numObjs; i++) membership[i] = -1;

    /* need to initialize newClusterSize and newClusters[0] to all 0 */
    newClusterSize = (int*) calloc(numClusters, sizeof(int));
    assert(newClusterSize != NULL);
    clusterSize    = (int*) calloc(numClusters, sizeof(int));
    assert(clusterSize != NULL);

    newClusters    = (float**) malloc(numClusters *            sizeof(float*));
    assert(newClusters != NULL);
    newClusters[0] = (float*)  calloc(numClusters * numCoords, sizeof(float));
    assert(newClusters[0] != NULL);
    for (i=1; i<numClusters; i++)
        newClusters[i] = newClusters[i-1] + numCoords;

    MPI_Allreduce(&numObjs, &total_numObjs, 1, MPI_INT, MPI_SUM, comm);
    if (_debug) printf("%2d: numObjs=%d total_numObjs=%d numClusters=%d numCoords=%d\n",rank,numObjs,total_numObjs,numClusters,numCoords);

    do {
    	done = 1;
        double curT = MPI_Wtime();
        no_of_changes = 0.0;
        for (i=0; i<numObjs; i++) {
            /* find the array index of nearest cluster center */
            index = find_nearest_cluster(numClusters, numCoords, objects[i],
                                         clusters);

            /*$$$$$$ if membership changes, increase no_of_changes by 1 */
            if (membership[i] != index) no_of_changes += 1.0;
			
					
			
            /* assign the membership to object i */
            membership[i] = index;

            /* update new cluster centers : sum of objects located within */
            newClusterSize[index]++;
            for (j=0; j<numCoords; j++)
                newClusters[index][j] += objects[i][j];
            
        }

		temp1    = (float**) malloc(numClusters * sizeof(float*));	
		for(i=0;i<5;i++)
		{
			temp1[i] = (float*)  calloc(numClusters * numCoords, sizeof(float));	
			temp1[i][0]=clusters[i][0];
			temp1[i][1]=clusters[i][1];
		}

        /* sum all data objects in newClusters */

        MPI_Allreduce(newClusters[0], clusters[0], numClusters*numCoords,
                      MPI_FLOAT, MPI_SUM, comm);
        MPI_Allreduce(newClusterSize, clusterSize, numClusters, MPI_INT,
                      MPI_SUM, comm);

        /* average the sum and replace old cluster centers with newClusters */
        for (i=0; i<numClusters; i++) {
            for (j=0; j<numCoords; j++) {
                if (clusterSize[i] > 1)
                    clusters[i][j] /= clusterSize[i];
                newClusters[i][j] = 0.0;   /* set back to 0 */
            }
            newClusterSize[i] = 0;   /* set back to 0 */
        }

       	for( k=0;k<5 && done == 1;k++)
       	{
       		result = euclid_dist_2(2,temp1[k],clusters[k]);
       		if(result > 0.04)
       			done = 0;
       	}
       		
            
        MPI_Allreduce(&no_of_changes, &no_of_changes_tmp, 1, MPI_FLOAT, MPI_SUM, comm);
        no_of_changes = no_of_changes_tmp / total_numObjs;

        if (_debug) {
            double maxTime;
            curT = MPI_Wtime() - curT;
            MPI_Reduce(&curT, &maxTime, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
            if (rank == 0) printf("%2d: loop=%d time=%f sec\n",rank,loop,curT);
        }
        free(temp1);
    }while((done == 0) && (loop++ < 10000));
    
    
    //printf("\nresult is %f and done is %d\n",result,loop);

    if (_debug && rank == 0) printf("%2d: no_of_changes=%f threshold=%f loop=%d\n",rank,no_of_changes,threshold,loop);

    free(newClusters[0]);
    free(newClusters);
    free(newClusterSize);
    free(clusterSize);

    return 1;
}