/** calc_means() * Compute the means for the various clusters */ void calc_means(int **points, int **means, int *clusters) { int i, j, grp_size; int *sum; sum = (int *)malloc(dim * sizeof(int)); for (i = 0; i < num_means; i++) { memset(sum, 0, dim * sizeof(int)); grp_size = 0; for (j = 0; j < num_points; j++) { if (clusters[j] == i) { add_to_sum(sum, points[j]); grp_size++; } } for (j = 0; j < dim; j++) { //dprintf("div sum = %d, grp size = %d\n", sum[j], grp_size); if (grp_size != 0) { means[i][j] = sum[j] / grp_size; } } } }
/** kmeans_reduce() * Updates the sum calculation for the various points */ void kmeans_reduce(void *key_in, iterator_t *itr) { assert (key_in); assert (itr); int i; int *sum; int *mean; void *val; int vals_len = iter_size (itr); sum = (int *)calloc(dim, sizeof(int)); mean = (int *)malloc(dim * sizeof(int)); i = 0; while (iter_next (itr, &val)) { add_to_sum (sum, val); ++i; } assert (i == vals_len); for (i = 0; i < dim; i++) { mean[i] = sum[i] / vals_len; } free(sum); emit(key_in, (void *)mean); }
/** calc_means() * Compute the means for the various clusters */ void *calc_means(void *arg) { int i, j, grp_size; int *sum; thread_arg *t_arg = (thread_arg *)arg; int start_idx = t_arg->start_idx; int end_idx = start_idx + t_arg->num_pts; int dim = t_arg->dim; int num_points = t_arg->num_points; int ** means = t_arg->means; int * clusters = t_arg->clusters; int ** points = t_arg->points; sum = t_arg->sum; for (i = start_idx; i < end_idx; i++) { memset(sum, 0, dim * sizeof(int)); grp_size = 0; for (j = 0; j < num_points; j++) { if (clusters[j] == i) { add_to_sum(sum, points[j], dim); grp_size++; } } for (j = 0; j < dim; j++) { //dprintf("div sum = %d, grp size = %d\n", sum[j], grp_size); if (grp_size != 0) { means[i][j] = sum[j] / grp_size; } } } // free(sum); return (void *)0; }