int main(int argc, char **argv) { final_data_t kmeans_vals; map_reduce_args_t map_reduce_args; int i; int *means; bool first_run; struct timeval begin, end; #ifdef TIMING unsigned int library_time = 0; unsigned int inter_library_time = 0; #endif get_time (&begin); parse_args(argc, argv); // get points kmeans_data.points = (int *)malloc(sizeof(int) * num_points * dim); generate_points(kmeans_data.points, num_points); // get means kmeans_data.means = (keyval_t *)malloc(sizeof(keyval_t) * num_means); means = malloc(sizeof(int) * dim * num_means); for (i=0; i<num_means; i++) { kmeans_data.means[i].val = &means[i * dim]; kmeans_data.means[i].key = malloc(sizeof(void *)); } generate_means(kmeans_data.means, num_means); kmeans_data.next_point = 0; kmeans_data.unit_size = sizeof(int) * dim; kmeans_data.clusters = (int *)malloc(sizeof(int) * num_points); memset(kmeans_data.clusters, -1, sizeof(int) * num_points); modified = true; CHECK_ERROR (map_reduce_init ()); // Setup map reduce args memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &kmeans_data; map_reduce_args.map = kmeans_map; map_reduce_args.reduce = kmeans_reduce; map_reduce_args.splitter = kmeans_splitter; map_reduce_args.locator = kmeans_locator; map_reduce_args.key_cmp = mykeycmp; map_reduce_args.unit_size = kmeans_data.unit_size; map_reduce_args.partition = NULL; // use default map_reduce_args.result = &kmeans_vals; map_reduce_args.data_size = (num_points + num_means) * dim * sizeof(int); map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 8; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; map_reduce_args.use_one_queue_per_task = true; printf("KMeans: Calling MapReduce Scheduler\n"); get_time (&end); #ifdef TIMING fprintf (stderr, "initialize: %u\n", time_diff (&end, &begin)); #endif first_run = true; while (modified == true) { modified = false; kmeans_data.next_point = 0; //dprintf("."); get_time (&begin); CHECK_ERROR (map_reduce (&map_reduce_args) < 0); get_time (&end); #ifdef TIMING library_time += time_diff (&end, &begin); #endif get_time (&begin); for (i = 0; i < kmeans_vals.length; i++) { int mean_idx = *((int *)(kmeans_vals.data[i].key)); if (first_run == false) free(kmeans_data.means[mean_idx].val); kmeans_data.means[mean_idx] = kmeans_vals.data[i]; } if (kmeans_vals.length > 0) free(kmeans_vals.data); get_time (&end); #ifdef TIMING inter_library_time += time_diff (&end, &begin); #endif first_run = false; } #ifdef TIMING fprintf (stderr, "library: %u\n", library_time); fprintf (stderr, "inter library: %u\n", inter_library_time); #endif get_time (&begin); CHECK_ERROR (map_reduce_finalize ()); dprintf("\n"); printf("KMeans: MapReduce Completed\n"); dprintf("\n\nFinal means:\n"); dump_means(kmeans_data.means, num_means); free(kmeans_data.points); for (i = 0; i < num_means; i++) { free(kmeans_data.means[i].key); free(kmeans_data.means[i].val); } free (kmeans_data.means); free (means); free(kmeans_data.clusters); get_time (&end); #ifdef TIMING fprintf (stderr, "finalize: %u\n", time_diff (&end, &begin)); #endif return 0; }
int main(int argc, char *argv[]) { final_data_t hist_vals; unsigned long long int i; int fd; char *fdata; struct stat finfo; char * fname; struct timeval begin, end; get_time (&begin); // Make sure a filename is specified if (argv[1] == NULL) { printf("USAGE: %s <bitmap filename>\n", argv[0]); exit(1); } fname = argv[1]; printf("Histogram: Running...\n"); // Read in the file CHECK_ERROR((fd = open(fname, O_RDONLY)) < 0); // Get the file info (for file length) CHECK_ERROR(fstat(fd, &finfo) < 0); #ifndef NO_MMAP // Memory map the file CHECK_ERROR((fdata = mmap(0, finfo.st_size + 1, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0)) == NULL); #else int ret; fdata = (char *)malloc (finfo.st_size); CHECK_ERROR (fdata == NULL); ret = read (fd, fdata, finfo.st_size); CHECK_ERROR (ret != finfo.st_size); #endif if ((fdata[0] != 'B') || (fdata[1] != 'M')) { printf("File is not a valid bitmap file. Exiting\n"); exit(1); } test_endianess(); // will set the variable "swap" unsigned short *bitsperpixel = (unsigned short *)(&(fdata[BITS_PER_PIXEL_POS])); if (swap) { swap_bytes((char *)(bitsperpixel), sizeof(*bitsperpixel)); } if (*bitsperpixel != 24) { // ensure its 3 bytes per pixel printf("Error: Invalid bitmap format - "); printf("This application only accepts 24-bit pictures. Exiting\n"); exit(1); } unsigned short *data_pos = (unsigned short *)(&(fdata[IMG_DATA_OFFSET_POS])); if (swap) { swap_bytes((char *)(data_pos), sizeof(*data_pos)); } unsigned long long int imgdata_bytes = finfo.st_size - (*(data_pos)); printf("This file has %llu bytes of image data, %llu pixels\n", imgdata_bytes, imgdata_bytes / 3); // We use this global variable arrays to store the "key" for each histogram // bucket. This is to prevent memory leaks in the mapreduce scheduler for (i = 0; i < 256; i++) { blue_keys[i] = i; green_keys[i] = 1000 + i; red_keys[i] = 2000 + i; } CHECK_ERROR (map_reduce_init ()); // Setup map reduce args map_reduce_args_t map_reduce_args; memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &(fdata[*data_pos]); //&hist_data; map_reduce_args.map = hist_map; map_reduce_args.reduce = hist_reduce; map_reduce_args.combiner = hist_combiner; map_reduce_args.splitter = NULL; //hist_splitter; map_reduce_args.key_cmp = myshortcmp; map_reduce_args.unit_size = 3; // 3 bytes per pixel map_reduce_args.partition = NULL; // use default map_reduce_args.result = &hist_vals; map_reduce_args.data_size = imgdata_bytes; map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 512; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; fprintf(stderr, "Histogram: Calling MapReduce Scheduler\n"); get_time (&end); #ifdef TIMING fprintf (stderr, "initialize: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); CHECK_ERROR( map_reduce (&map_reduce_args) < 0); get_time (&end); #ifdef TIMING fprintf (stderr, "library: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); CHECK_ERROR (map_reduce_finalize ()); short pix_val; intptr_t freq; short prev = 0; dprintf("\n\nBlue\n"); dprintf("----------\n\n"); for (i = 0; i < hist_vals.length; i++) { keyval_t * curr = &((keyval_t *)hist_vals.data)[i]; pix_val = *((short *)curr->key); freq = (intptr_t)curr->val; if (pix_val - prev > 700) { if (pix_val >= 2000) { dprintf("\n\nRed\n"); dprintf("----------\n\n"); } else if (pix_val >= 1000) { dprintf("\n\nGreen\n"); dprintf("----------\n\n"); } } dprintf("%hd - %" PRIdPTR "\n", pix_val % 1000, freq); prev = pix_val; } free(hist_vals.data); #ifndef NO_MMAP CHECK_ERROR (munmap (fdata, finfo.st_size + 1) < 0); #else free (fdata); #endif CHECK_ERROR (close (fd) < 0); get_time (&end); #ifdef TIMING fprintf (stderr, "finalize: %u\n", time_diff (&end, &begin)); #endif return 0; }
void partition_op(final_data_t *in, int (*hsh_prt)(void *record), final_data_t *op_results, int op_num){ //final_data_t *in = (final_data_t *)void_in; size_t keyval_size = sizeof(keyval_t); hsh_prt_fptr = hsh_prt; int env_threads = atoi(GETENV("MR_NUMTHREADS")); int proc_threads = get_nprocs(); struct timeval begin_op, end_op; printf("\npartition operator \n"); printf("results size %d \n",in->length); printf("results * unitsize %d \n",(in->length) * (int)keyval_size); get_time (&begin_op); //Setup map reduce args map_reduce_args_t map_reduce_args; memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = in->data; map_reduce_args.map = mapper_prt; map_reduce_args.reduce = NULL;// identity_reducer; map_reduce_args.splitter = NULL;// Array Splitter map_reduce_args.key_cmp = ( (op_num==1)? integercmp : nullcmp ); map_reduce_args.unit_size = keyval_size; map_reduce_args.partition = NULL; // partition_ret; map_reduce_args.result = op_results; map_reduce_args.data_size = (in->length) * keyval_size; map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE")); map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS")); map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; map_reduce_args.use_one_queue_per_task = atoi(GETENV("MR_1QPERTASK")) ? true : false; num_reducers = ((env_threads > 0)?env_threads:proc_threads); printf("number of reducers: %d \n",num_reducers); get_time(&begin); CHECK_ERROR(map_reduce (&map_reduce_args) < 0); get_time(&end); #ifdef TIMING library_time += time_diff (&end, &begin); fprintf (stderr, "library: %u\n", library_time); #endif get_time (&end_op); #ifdef TIMING fprintf (stderr, "Partition time: %u\n\n", time_diff (&end_op, &begin_op)); #endif }
int main(int argc, char *argv[]) { final_data_t str_vals; struct timeval begin, end; struct timeval starttime,endtime; str_data_t str_data; get_time (&begin); CHECK_ERROR (map_reduce_init (&argc, &argv)); compute_hashes(key1, key1_final, strlen(key1)); compute_hashes(key2, key2_final, strlen(key2)); compute_hashes(key3, key3_final, strlen(key3)); compute_hashes(key4, key4_final, strlen(key4)); str_data.offset = 0; str_data.fname_keys = argv[1]; printf("String Match: Running...\n"); // Setup scheduler args map_reduce_args_t map_reduce_args; memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &str_data; map_reduce_args.task_data_size = sizeof(str_data_t); map_reduce_args.prep = sm_prep; map_reduce_args.cleanup = sm_cleanup; map_reduce_args.map = string_match_map; map_reduce_args.reduce = sm_reduce; map_reduce_args.splitter = string_match_splitter; map_reduce_args.key_cmp = mystrcmp; map_reduce_args.unit_size = DEFAULT_UNIT_SIZE; map_reduce_args.partition = NULL; // use default map_reduce_args.result = &str_vals; map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 512; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; printf("String Match: Calling String Match\n"); gettimeofday(&starttime,0); get_time (&end); #ifdef TIMING fprintf (stderr, "initialize: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); CHECK_ERROR (map_reduce (&map_reduce_args) < 0); get_time (&end); #ifdef TIMING fprintf (stderr, "library: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); gettimeofday(&endtime,0); printf("\nString Match Results:\n"); int i; for (i = 0; i < str_vals.length; i++) { keyval_t * curr = &((keyval_t *)str_vals.data)[i]; dprintf("%15s - %" PRIdPTR "\n", (char *)curr->key, (intptr_t)curr->val); } get_time (&end); map_reduce_cleanup(&map_reduce_args); CHECK_ERROR (map_reduce_finalize ()); #ifdef TIMING fprintf (stderr, "finalize: %u\n", time_diff (&end, &begin)); #endif return 0; }