Пример #1
0
/** hist_map()
 * Map function that computes the histogram values for the portion
 * of the image assigned to the map task
 */
void hist_map(map_args_t *args) 
{
   short *key;
   unsigned char *val;
   int red[   256 ];
   int green[ 256 ];
   int blue[  256 ];
   
   assert(args);
   unsigned char *data = (unsigned char *)args->data;
   assert(data);

   memset(  &(red[0]), 
            0, 
            sizeof(int) * 256);
   memset(  &(green[0]), 
            0, 
            sizeof(int) * 256);
   memset(  &(blue[0]), 
            0, 
            sizeof(int) * 256);
   
   for ( int i = 0; i < (args->length) * 3; i+=3) 
   {
      val = &(data[i]);
      blue[*val]++;
      
      val = &(data[i+1]);
      green[*val]++;
      
      val = &(data[i+2]);
      red[*val]++;   
   }
   
   for ( int i = 0; i < 256; i++) 
   {
      if (blue[i] > 0) 
      {
         key = &(blue_keys[i]);
         emit_intermediate( (void*) key, 
                            (void*) blue[i], 
                            sizeof(short) );
      }
      if (green[i] > 0) 
      {
         key = &(green_keys[i]);
         emit_intermediate((void *)key, (void *)green[i], sizeof(short));
      }
      if (red[i] > 0) 
      {
         key = &(red_keys[i]);
         emit_intermediate((void *)key, (void *)red[i], sizeof(short));
      }
   }
}
Пример #2
0
/** find_clusters()
 *  Find the cluster that is most suitable for a given set of points
 */
void find_clusters(int *points, keyval_t *means, int *clusters, int size) 
{
    int i, j;
    unsigned int min_dist, cur_dist;
    int min_idx;

    for (i = 0; i < size; i++) 
    {
        min_dist = get_sq_dist(&points[i * dim], (int *)(means[0].val));
        min_idx = 0; 
        for (j = 1; j < num_means; j++)
        {
            cur_dist = get_sq_dist(&points[i * dim], (int *)(means[j].val));
            if (cur_dist < min_dist) 
            {
                min_dist = cur_dist;
                min_idx = j;    
            }
        }

        if (clusters[i] != min_idx) 
        {
            clusters[i] = min_idx;
            modified = true;
        }
        //dprintf("Emitting [%d,%d]\n", *((int *)means[min_idx].key), *(points[i]));
        emit_intermediate(means[min_idx].key, (void *)(&points[i * dim]), sizeof(means[min_idx].key));
    }    
}
Пример #3
0
/** string_match_map()
 *  Map Function that checks the hash of each word to the given hashes
 */
void string_match_map(map_args_t *args)
{
    assert(args);
    
    size_t key_len, total_len = 0;
    char *key_file = args->data;
    char *cur_word;

	char cur_word_final[MAX_REC_LEN];

    while(total_len < args->length) {
		for(;
			(*key_file == '\0' || *key_file == '\r' || *key_file == '\n') && total_len < args->length;
			key_file += 1, total_len += 1);

		if(total_len == args->length) break;

		for(cur_word = key_file, key_len = 0;
			*key_file != '\r' && *key_file != '\n' && total_len < args->length;
			key_file += 1, total_len += 1, key_len += 1);

		*key_file = 0;
		CHECK_ERROR(key_len <= 0);
		CHECK_ERROR(key_len > MAX_REC_LEN);

		memset(cur_word_final, 0, MAX_REC_LEN);
        compute_hashes(cur_word, cur_word_final, key_len);

        if(!strcmp(key1_final, cur_word_final)) {
			emit_intermediate(cur_word, (void *)1, key_len);
		}

        if(!strcmp(key2_final, cur_word_final)) {
			emit_intermediate(cur_word, (void *)1, key_len);
		}

        if(!strcmp(key3_final, cur_word_final)) {
			emit_intermediate(cur_word, (void *)1, key_len);
		}

        if(!strcmp(key4_final, cur_word_final)) {
			emit_intermediate(cur_word, (void *)1, key_len);
		}
    }
}
Пример #4
0
/**
 * partitioned emit_intermediate algorithm
 */
void partition_emit(record_t *record){

	void *hsh_id;
	int *prt_id = malloc(sizeof(int));
	
	*prt_id = hsh_prt_fptr(record);

	emit_intermediate(prt_id, (void *)record, (int)sizeof(int));

}
Пример #5
0
/** wordcount_map()
 * Go through the allocated portion of the file and count the words
 */
void wordcount_map(map_args_t *args) {
	char *curr_start, curr_ltr;
	int state = NOT_IN_WORD;
	int i;
  
	assert(args);

	char *data = (char *)args->data;

	assert(data);
	curr_start = data;
	
	for (i = 0; i < args->length; i++) {
		curr_ltr = toupper(data[i]);
		switch (state) {
		case IN_WORD:
			data[i] = curr_ltr;
			if ((curr_ltr < 'A' || curr_ltr > 'Z') && curr_ltr != '\'') {
				data[i] = 0;
				emit_intermediate(curr_start, (void *)1, &data[i] - curr_start + 1);
				state = NOT_IN_WORD;
			}
			break;

		default:
		case NOT_IN_WORD:
			if (curr_ltr >= 'A' && curr_ltr <= 'Z') {
				curr_start = &data[i];
				data[i] = curr_ltr;
				state = IN_WORD;
			}
			break;
		}
	}

	// Add the last word
	if (state == IN_WORD) {
		data[args->length] = 0;
		emit_intermediate(curr_start, (void *)1, &data[i] - curr_start + 1);
	}
}
Пример #6
0
/** mr_sort_map()
 *  Sorts based on the val output of wordcount
 */
static void mr_sort_map(map_args_t *args) 
{
   assert(args);
   
   void *data = (void *)args->data;
   int i;

   assert(data);
   
   qsort(data, args->length, unit_size, compare_g);
   for (i = 0; i < args->length; i++)
   {
      emit_intermediate(((char *)data) + (i*unit_size), (void *)0, unit_size); 
   }
}
Пример #7
0
/** pca_cov_map()
 *  Map task for computing the covariance matrix
 * 
 */
void pca_cov_map(map_args_t *args)
{
    assert(args);
    assert(args->length == 1);
    int i, j;
    int *start_row, *cov_row;
    int start_idx, cov_idx;
    keyval_t *mean;
    int sum;
    intptr_t covariance;
    intptr_t m1, m2;
    
    pca_cov_data_t *cov_data = (pca_cov_data_t *)args->data;
    mean = cov_data->mean;
    pca_cov_loc_t *cov_loc;
    
    /* compute the covariance for the allocated region */
    for (i=0; i<cov_data->size; i++) 
    {
        start_idx = cov_data->cov_locs[i].start_row;
        cov_idx = cov_data->cov_locs[i].cov_row;
        assert(cov_idx >= start_idx);
        start_row = &cov_data->matrix[start_idx * num_cols];
        cov_row = &cov_data->matrix[cov_idx * num_cols];
        sum = 0;
        //dprintf("Mean for row %d is %d\n", start_idx, *((int *)(mean[start_idx].val)));
        //dprintf("Mean for row %d is %d\n", cov_idx, *((int *)(mean[cov_idx].val)));
        m1 = (intptr_t)mean[start_idx].val;
        m2 = (intptr_t)mean[cov_idx].val;
        /* XXX: Shouldn't this be num_cols? */
        for (j=0; j<num_rows; j++)
        {
            sum += (start_row[j] - m1) * (cov_row[j] - m2);
        }
        
        covariance = sum / (num_rows-1);
        
        //dprintf("Covariance for <%d, %d> is %d\n", start_idx, cov_idx, *covariance);
        
        CHECK_ERROR((cov_loc = (pca_cov_loc_t *)malloc(sizeof(pca_cov_loc_t))) == NULL);
        cov_loc->start_row = cov_data->cov_locs[i].start_row;
        cov_loc->cov_row = cov_data->cov_locs[i].cov_row;
        emit_intermediate((void *)cov_loc, (void *)covariance, sizeof(pca_cov_loc_t));
    }
    
    free(cov_data->cov_locs);
    free(cov_data);
}
Пример #8
0
/** pca_mean_map()
 *  Map task to compute the mean
 */
void pca_mean_map(map_args_t *args)
{
    int sum;
    intptr_t mean;
    int i, j;
    pca_map_data_t *data = (pca_map_data_t *)args->data;
    int *matrix = data->matrix;
    
    /* Compute the mean for the allocated rows to the map task */
    for (i=0; i<args->length; i++) 
    {
        sum = 0;
        for (j=0; j<num_cols; j++) 
        {
            sum += matrix[i * num_cols + j]; 
        }
        mean = sum / num_cols;
        emit_intermediate((void *)&matrix[i * num_cols], (void *)mean, sizeof(int *));
    }
    
    free(data);
}
Пример #9
0
    void map(data_type const& s, map_container& out) const
    {
        for (uint64_t i = 0; i < s.len; i++)
        {
            s.data[i] = toupper(s.data[i]);
        }

        uint64_t i = 0;
        while(i < s.len)
        {            
            while(i < s.len && (s.data[i] < 'A' || s.data[i] > 'Z'))
                i++;
            uint64_t start = i;
            while(i < s.len && ((s.data[i] >= 'A' && s.data[i] <= 'Z') || s.data[i] == '\''))
                i++;
            if(i > start)
            {
                s.data[i] = 0;
                wc_word word = { s.data+start };
                emit_intermediate(out, word, 1);
            }
        }
    }
Пример #10
0
/**
 *  Key_emit 
 */
void key_emit(record_t *record){

	//char *temp = (char *)key_fptr(record);
	emit_intermediate( key_fptr(record), aggr_ptr_fptr(record), ksize_fptr(record) );

}