/** hist_map() * Map function that computes the histogram values for the portion * of the image assigned to the map task */ void hist_map(map_args_t *args) { short *key; unsigned char *val; int red[ 256 ]; int green[ 256 ]; int blue[ 256 ]; assert(args); unsigned char *data = (unsigned char *)args->data; assert(data); memset( &(red[0]), 0, sizeof(int) * 256); memset( &(green[0]), 0, sizeof(int) * 256); memset( &(blue[0]), 0, sizeof(int) * 256); for ( int i = 0; i < (args->length) * 3; i+=3) { val = &(data[i]); blue[*val]++; val = &(data[i+1]); green[*val]++; val = &(data[i+2]); red[*val]++; } for ( int i = 0; i < 256; i++) { if (blue[i] > 0) { key = &(blue_keys[i]); emit_intermediate( (void*) key, (void*) blue[i], sizeof(short) ); } if (green[i] > 0) { key = &(green_keys[i]); emit_intermediate((void *)key, (void *)green[i], sizeof(short)); } if (red[i] > 0) { key = &(red_keys[i]); emit_intermediate((void *)key, (void *)red[i], sizeof(short)); } } }
/** find_clusters() * Find the cluster that is most suitable for a given set of points */ void find_clusters(int *points, keyval_t *means, int *clusters, int size) { int i, j; unsigned int min_dist, cur_dist; int min_idx; for (i = 0; i < size; i++) { min_dist = get_sq_dist(&points[i * dim], (int *)(means[0].val)); min_idx = 0; for (j = 1; j < num_means; j++) { cur_dist = get_sq_dist(&points[i * dim], (int *)(means[j].val)); if (cur_dist < min_dist) { min_dist = cur_dist; min_idx = j; } } if (clusters[i] != min_idx) { clusters[i] = min_idx; modified = true; } //dprintf("Emitting [%d,%d]\n", *((int *)means[min_idx].key), *(points[i])); emit_intermediate(means[min_idx].key, (void *)(&points[i * dim]), sizeof(means[min_idx].key)); } }
/** string_match_map() * Map Function that checks the hash of each word to the given hashes */ void string_match_map(map_args_t *args) { assert(args); size_t key_len, total_len = 0; char *key_file = args->data; char *cur_word; char cur_word_final[MAX_REC_LEN]; while(total_len < args->length) { for(; (*key_file == '\0' || *key_file == '\r' || *key_file == '\n') && total_len < args->length; key_file += 1, total_len += 1); if(total_len == args->length) break; for(cur_word = key_file, key_len = 0; *key_file != '\r' && *key_file != '\n' && total_len < args->length; key_file += 1, total_len += 1, key_len += 1); *key_file = 0; CHECK_ERROR(key_len <= 0); CHECK_ERROR(key_len > MAX_REC_LEN); memset(cur_word_final, 0, MAX_REC_LEN); compute_hashes(cur_word, cur_word_final, key_len); if(!strcmp(key1_final, cur_word_final)) { emit_intermediate(cur_word, (void *)1, key_len); } if(!strcmp(key2_final, cur_word_final)) { emit_intermediate(cur_word, (void *)1, key_len); } if(!strcmp(key3_final, cur_word_final)) { emit_intermediate(cur_word, (void *)1, key_len); } if(!strcmp(key4_final, cur_word_final)) { emit_intermediate(cur_word, (void *)1, key_len); } } }
/** * partitioned emit_intermediate algorithm */ void partition_emit(record_t *record){ void *hsh_id; int *prt_id = malloc(sizeof(int)); *prt_id = hsh_prt_fptr(record); emit_intermediate(prt_id, (void *)record, (int)sizeof(int)); }
/** wordcount_map() * Go through the allocated portion of the file and count the words */ void wordcount_map(map_args_t *args) { char *curr_start, curr_ltr; int state = NOT_IN_WORD; int i; assert(args); char *data = (char *)args->data; assert(data); curr_start = data; for (i = 0; i < args->length; i++) { curr_ltr = toupper(data[i]); switch (state) { case IN_WORD: data[i] = curr_ltr; if ((curr_ltr < 'A' || curr_ltr > 'Z') && curr_ltr != '\'') { data[i] = 0; emit_intermediate(curr_start, (void *)1, &data[i] - curr_start + 1); state = NOT_IN_WORD; } break; default: case NOT_IN_WORD: if (curr_ltr >= 'A' && curr_ltr <= 'Z') { curr_start = &data[i]; data[i] = curr_ltr; state = IN_WORD; } break; } } // Add the last word if (state == IN_WORD) { data[args->length] = 0; emit_intermediate(curr_start, (void *)1, &data[i] - curr_start + 1); } }
/** mr_sort_map() * Sorts based on the val output of wordcount */ static void mr_sort_map(map_args_t *args) { assert(args); void *data = (void *)args->data; int i; assert(data); qsort(data, args->length, unit_size, compare_g); for (i = 0; i < args->length; i++) { emit_intermediate(((char *)data) + (i*unit_size), (void *)0, unit_size); } }
/** pca_cov_map() * Map task for computing the covariance matrix * */ void pca_cov_map(map_args_t *args) { assert(args); assert(args->length == 1); int i, j; int *start_row, *cov_row; int start_idx, cov_idx; keyval_t *mean; int sum; intptr_t covariance; intptr_t m1, m2; pca_cov_data_t *cov_data = (pca_cov_data_t *)args->data; mean = cov_data->mean; pca_cov_loc_t *cov_loc; /* compute the covariance for the allocated region */ for (i=0; i<cov_data->size; i++) { start_idx = cov_data->cov_locs[i].start_row; cov_idx = cov_data->cov_locs[i].cov_row; assert(cov_idx >= start_idx); start_row = &cov_data->matrix[start_idx * num_cols]; cov_row = &cov_data->matrix[cov_idx * num_cols]; sum = 0; //dprintf("Mean for row %d is %d\n", start_idx, *((int *)(mean[start_idx].val))); //dprintf("Mean for row %d is %d\n", cov_idx, *((int *)(mean[cov_idx].val))); m1 = (intptr_t)mean[start_idx].val; m2 = (intptr_t)mean[cov_idx].val; /* XXX: Shouldn't this be num_cols? */ for (j=0; j<num_rows; j++) { sum += (start_row[j] - m1) * (cov_row[j] - m2); } covariance = sum / (num_rows-1); //dprintf("Covariance for <%d, %d> is %d\n", start_idx, cov_idx, *covariance); CHECK_ERROR((cov_loc = (pca_cov_loc_t *)malloc(sizeof(pca_cov_loc_t))) == NULL); cov_loc->start_row = cov_data->cov_locs[i].start_row; cov_loc->cov_row = cov_data->cov_locs[i].cov_row; emit_intermediate((void *)cov_loc, (void *)covariance, sizeof(pca_cov_loc_t)); } free(cov_data->cov_locs); free(cov_data); }
/** pca_mean_map() * Map task to compute the mean */ void pca_mean_map(map_args_t *args) { int sum; intptr_t mean; int i, j; pca_map_data_t *data = (pca_map_data_t *)args->data; int *matrix = data->matrix; /* Compute the mean for the allocated rows to the map task */ for (i=0; i<args->length; i++) { sum = 0; for (j=0; j<num_cols; j++) { sum += matrix[i * num_cols + j]; } mean = sum / num_cols; emit_intermediate((void *)&matrix[i * num_cols], (void *)mean, sizeof(int *)); } free(data); }
void map(data_type const& s, map_container& out) const { for (uint64_t i = 0; i < s.len; i++) { s.data[i] = toupper(s.data[i]); } uint64_t i = 0; while(i < s.len) { while(i < s.len && (s.data[i] < 'A' || s.data[i] > 'Z')) i++; uint64_t start = i; while(i < s.len && ((s.data[i] >= 'A' && s.data[i] <= 'Z') || s.data[i] == '\'')) i++; if(i > start) { s.data[i] = 0; wc_word word = { s.data+start }; emit_intermediate(out, word, 1); } } }
/** * Key_emit */ void key_emit(record_t *record){ //char *temp = (char *)key_fptr(record); emit_intermediate( key_fptr(record), aggr_ptr_fptr(record), ksize_fptr(record) ); }