// =========================================================================== // =========================================================================== void kmeans_mpi_do_tile(float *objects, int *membership, float *partial_clusters, int *partial_sizes, float *clusters, int objects_per_tile, int num_clusters) { int i; int j; //printf("%d: do_tile 0x%X 0x%X 0x%X 0x%X 0x%X\r\n", sys_get_worker_id(), objects, membership, partial_clusters, partial_sizes, clusters); // Clear partial arrays for (i = 0; i < num_clusters; i++) { partial_sizes[i] = 0; for (j = 0; j < COORDS; j++) { partial_clusters[i * COORDS + j] = 0.0F; } } // Cluster for (i = 0; i < objects_per_tile; i++) { membership[i] = find_nearest_cluster(num_clusters, objects + i * COORDS, clusters); partial_sizes[membership[i]]++; for (j = 0; j < COORDS; j++) { partial_clusters[membership[i] * COORDS + j] += objects[i * COORDS + j]; } } }
void work(struct input *x){ int tid = x->tid; double local_delta=0; for (int i = tid; i < x->numObjs; i += nthreads) { /* find the array index of nearest cluster center */ int index = find_nearest_cluster(x->numClusters, x->numCoords, x->objects[i], x->clusters); /* if membership changes, increase delta by 1 */ if (x->membership[i] != index) local_delta += 1.0; /* assign the membership to object i */ x->membership[i] = index; /* update new cluster centers : sum of all objects located within (average will be performed later) */ x->local_newClusterSize[tid][index]++; for (int j=0; j < x->numCoords; j++) x->local_newClusters[tid][index][j] += x->objects[i][j]; } pthread_mutex_lock(&lock1); delta +=local_delta; pthread_mutex_unlock(&lock1); }
int assign_nearest_cluster( const std::vector<sample_type>& samples) { int num_swaps = 0; for( int i = 0; i < samples.size(); ++i) { int nearest; const cluster_t *cl = find_nearest_cluster( samples[i], nearest); if( labels_[i] != cl) { labels_[i] = cl; ++num_swaps; } } return num_swaps; }
void cluster_builder::once(std::vector<cv::Point2f> &pts, std::vector<cluster_builder::Cluster> &clusters, double threshold) { for (std::vector<cv::Point2f>::iterator it = pts.begin(); it != pts.end();) { double dis; std::vector<Cluster>::iterator it_cluster = find_nearest_cluster(*it, clusters, dis); if (it_cluster == clusters.end()) { Cluster c; c.id = next_cid_++; c.mean_pt = *it; c.pts.push_back(*it); clusters.push_back(c); it = pts.erase(it); } else { if (dis < threshold) { it_cluster->pts.push_back(*it); it_cluster->calc_mean(); it = pts.erase(it); } else if (dis > 2 * threshold) { Cluster c; c.id = next_cid_++; c.mean_pt = *it; c.pts.push_back(*it); clusters.push_back(c); it = pts.erase(it); } else { ++it; // 保留,下次迭代 ... } } } }
int mpi_kmeans(float **objects, /* in: [numObjs][numCoords] */ int numCoords, int numObjs, int numClusters, float threshold, /* % objects change membership */ int *membership, /* out: [numObjs] membership of points with parent cluster */ float **clusters, /* out: [numClusters][numCoords] */ MPI_Comm comm) /* MPI communicator */ { int i, j, rank, index, loop=0, total_numObjs , done=1; int *newClusterSize; /* [numClusters]: no. objects assigned in each new cluster */ int *clusterSize; /* [numClusters]: temp buffer for Allreduce */ float no_of_changes; /* % of objects change their clusters */ float no_of_changes_tmp; float **newClusters; /* [numClusters][numCoords] */ extern int _debug; float **temp1; float result = 0.0; int k=0; if (_debug) MPI_Comm_rank(comm, &rank); /* initialize membership[] */ for (i=0; i<numObjs; i++) membership[i] = -1; /* need to initialize newClusterSize and newClusters[0] to all 0 */ newClusterSize = (int*) calloc(numClusters, sizeof(int)); assert(newClusterSize != NULL); clusterSize = (int*) calloc(numClusters, sizeof(int)); assert(clusterSize != NULL); newClusters = (float**) malloc(numClusters * sizeof(float*)); assert(newClusters != NULL); newClusters[0] = (float*) calloc(numClusters * numCoords, sizeof(float)); assert(newClusters[0] != NULL); for (i=1; i<numClusters; i++) newClusters[i] = newClusters[i-1] + numCoords; MPI_Allreduce(&numObjs, &total_numObjs, 1, MPI_INT, MPI_SUM, comm); if (_debug) printf("%2d: numObjs=%d total_numObjs=%d numClusters=%d numCoords=%d\n",rank,numObjs,total_numObjs,numClusters,numCoords); do { done = 1; double curT = MPI_Wtime(); no_of_changes = 0.0; for (i=0; i<numObjs; i++) { /* find the array index of nearest cluster center */ index = find_nearest_cluster(numClusters, numCoords, objects[i], clusters); /*$$$$$$ if membership changes, increase no_of_changes by 1 */ if (membership[i] != index) no_of_changes += 1.0; /* assign the membership to object i */ membership[i] = index; /* update new cluster centers : sum of objects located within */ newClusterSize[index]++; for (j=0; j<numCoords; j++) newClusters[index][j] += objects[i][j]; } temp1 = (float**) malloc(numClusters * sizeof(float*)); for(i=0;i<5;i++) { temp1[i] = (float*) calloc(numClusters * numCoords, sizeof(float)); temp1[i][0]=clusters[i][0]; temp1[i][1]=clusters[i][1]; } /* sum all data objects in newClusters */ MPI_Allreduce(newClusters[0], clusters[0], numClusters*numCoords, MPI_FLOAT, MPI_SUM, comm); MPI_Allreduce(newClusterSize, clusterSize, numClusters, MPI_INT, MPI_SUM, comm); /* average the sum and replace old cluster centers with newClusters */ for (i=0; i<numClusters; i++) { for (j=0; j<numCoords; j++) { if (clusterSize[i] > 1) clusters[i][j] /= clusterSize[i]; newClusters[i][j] = 0.0; /* set back to 0 */ } newClusterSize[i] = 0; /* set back to 0 */ } for( k=0;k<5 && done == 1;k++) { result = euclid_dist_2(2,temp1[k],clusters[k]); if(result > 0.04) done = 0; } MPI_Allreduce(&no_of_changes, &no_of_changes_tmp, 1, MPI_FLOAT, MPI_SUM, comm); no_of_changes = no_of_changes_tmp / total_numObjs; if (_debug) { double maxTime; curT = MPI_Wtime() - curT; MPI_Reduce(&curT, &maxTime, 1, MPI_DOUBLE, MPI_MAX, 0, comm); if (rank == 0) printf("%2d: loop=%d time=%f sec\n",rank,loop,curT); } free(temp1); }while((done == 0) && (loop++ < 10000)); //printf("\nresult is %f and done is %d\n",result,loop); if (_debug && rank == 0) printf("%2d: no_of_changes=%f threshold=%f loop=%d\n",rank,no_of_changes,threshold,loop); free(newClusters[0]); free(newClusters); free(newClusterSize); free(clusterSize); return 1; }