Ejemplo n.º 1
0
/** calc_means()
 *  Compute the means for the various clusters
 */
void calc_means(int **points, int **means, int *clusters)
{
    int i, j, grp_size;
    int *sum;
    
    sum = (int *)malloc(dim * sizeof(int));
    
    for (i = 0; i < num_means; i++) 
    {
        memset(sum, 0, dim * sizeof(int));
        grp_size = 0;
        
        for (j = 0; j < num_points; j++)
        {
            if (clusters[j] == i) 
            {
                add_to_sum(sum, points[j]);
                grp_size++;
            }    
        }
        
        for (j = 0; j < dim; j++)
        {
            //dprintf("div sum = %d, grp size = %d\n", sum[j], grp_size);
            if (grp_size != 0)
            { 
                means[i][j] = sum[j] / grp_size;
            }
        }         
    }
}
Ejemplo n.º 2
0
/** kmeans_reduce()
 *	Updates the sum calculation for the various points
 */
void kmeans_reduce(void *key_in, iterator_t *itr)
{
    assert (key_in);
    assert (itr);
    
    int i;
    int *sum;
    int *mean;
    void *val;
    int vals_len = iter_size (itr);
    
    sum = (int *)calloc(dim, sizeof(int));
    mean = (int *)malloc(dim * sizeof(int));
    
    i = 0;
    while (iter_next (itr, &val))
    {
        add_to_sum (sum, val);
        ++i;
    }
    assert (i == vals_len);
    
    for (i = 0; i < dim; i++)
    {
        mean[i] = sum[i] / vals_len;
    }
    
    free(sum);
    emit(key_in, (void *)mean);
}
Ejemplo n.º 3
0
/** calc_means()
 *  Compute the means for the various clusters
 */
void *calc_means(void *arg)
{
   int i, j, grp_size;
   int *sum;
   thread_arg *t_arg = (thread_arg *)arg;
   int start_idx = t_arg->start_idx;
   int end_idx = start_idx + t_arg->num_pts;
   int dim = t_arg->dim;
   int num_points = t_arg->num_points;
   int ** means = t_arg->means;
   int * clusters = t_arg->clusters;
   int ** points = t_arg->points;

   sum = t_arg->sum;
   
   for (i = start_idx; i < end_idx; i++) 
   {
      memset(sum, 0, dim * sizeof(int));
      grp_size = 0;
      
      for (j = 0; j < num_points; j++)
      {
         if (clusters[j] == i) 
         {
	   add_to_sum(sum, points[j], dim);
            grp_size++;
         }   
      }
      
      for (j = 0; j < dim; j++)
      {
         //dprintf("div sum = %d, grp size = %d\n", sum[j], grp_size);
         if (grp_size != 0)
         { 
            means[i][j] = sum[j] / grp_size;
         }
      }       
   }
   //   free(sum);
   return (void *)0;
}