int main(int argc, char *argv[]) { int i; wc_data_t wc_data; i = map_reduce_init (&argc, &argv); CHECK_ERROR(i < 0); wc_data.fname = argv[1]; printf("Wordcount: Running...\n"); // Setup splitter args wc_data.unit_size = 5; // approx 3 bytes per word wc_data.fpos = 0; // Setup map reduce args map_reduce_args_t map_reduce_args; memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &wc_data; map_reduce_args.task_data_size = sizeof(wc_data_t); map_reduce_args.prep = wc_prep; map_reduce_args.cleanup = wc_cleanup; map_reduce_args.map = wordcount_map; map_reduce_args.reduce = wordcount_reduce; map_reduce_args.combiner = wordcount_combiner; map_reduce_args.splitter = wordcount_splitter; map_reduce_args.key_cmp = mystrcmp; map_reduce_args.unit_size = wc_data.unit_size; map_reduce_args.partition = NULL; // use default map_reduce_args.result = &wc_data.wc_vals; map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 1024 * 2; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; printf("Wordcount: Calling MapReduce Scheduler Wordcount\n"); CHECK_ERROR(map_reduce (&map_reduce_args) < 0); map_reduce_cleanup(&map_reduce_args); CHECK_ERROR (map_reduce_finalize ()); get_time (&end); return 0; }
void unload_op(filedata_t *fd){ //printf("unload 1 \n"); CHECK_ERROR (map_reduce_finalize ()); //printf("unload 2 \n"); CHECK_ERROR(close(fd->fhandle) < 0); //printf("unload 3 \n"); #ifndef NO_MMAP CHECK_ERROR(munmap(fd->f_data, fd->flen + 1) < 0); // #else // free (fd); #endif //printf("unload 4 \n"); //write to file //finalize phoenix //free memory }
int main(int argc, char *argv[]) { final_data_t hist_vals; int i; struct timeval begin, end; hist_data_t hist_data; get_time (&begin); // We use this global variable arrays to store the "key" for each histogram // bucket. This is to prevent memory leaks in the mapreduce scheduler for (i = 0; i < 256; i++) { blue_keys[i] = i; green_keys[i] = 1000 + i; red_keys[i] = 2000 + i; } i = map_reduce_init(&argc, &argv); CHECK_ERROR (i < 0); hist_data.fname = argv[1]; printf("Histogram: Running...\n"); // Setup map reduce args map_reduce_args_t map_reduce_args; memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &hist_data; map_reduce_args.task_data_size = sizeof(hist_data_t); map_reduce_args.prep = hist_prep; map_reduce_args.cleanup = hist_cleanup; map_reduce_args.map = hist_map; map_reduce_args.reduce = hist_reduce; map_reduce_args.combiner = hist_combiner; map_reduce_args.splitter = hist_splitter; map_reduce_args.key_cmp = myshortcmp; map_reduce_args.unit_size = 3; // 3 bytes per pixel hist_data.unit_size = 3; map_reduce_args.partition = NULL; // use default map_reduce_args.result = &hist_vals; map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 512; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; fprintf(stderr, "Histogram: Calling MapReduce Scheduler\n"); get_time (&end); #ifdef TIMING fprintf (stderr, "initialize: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); CHECK_ERROR( map_reduce (&map_reduce_args) < 0); get_time (&end); #ifdef TIMING fprintf (stderr, "library: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); short pix_val; intptr_t freq; short prev = 0; dprintf("\n\nBlue\n"); dprintf("----------\n\n"); for (i = 0; i < hist_vals.length; i++) { keyval_t * curr = &((keyval_t *)hist_vals.data)[i]; pix_val = *((short *)curr->key); freq = (intptr_t)curr->val; if (pix_val - prev > 700) { if (pix_val >= 2000) { dprintf("\n\nRed\n"); dprintf("----------\n\n"); } else if (pix_val >= 1000) { dprintf("\n\nGreen\n"); dprintf("----------\n\n"); } } dprintf("%hd - %" PRIdPTR "\n", pix_val % 1000, freq); prev = pix_val; } map_reduce_cleanup(&map_reduce_args); CHECK_ERROR (map_reduce_finalize ()); get_time (&end); #ifdef TIMING fprintf (stderr, "finalize: %u\n", time_diff (&end, &begin)); #endif return 0; }
int main(int argc, char **argv) { final_data_t pca_mean_vals; final_data_t pca_cov_vals; map_reduce_args_t map_reduce_args; int i; struct timeval begin, end; #ifdef TIMING unsigned int library_time = 0; #endif get_time (&begin); parse_args(argc, argv); // Allocate space for the matrix pca_data.matrix = (int *)malloc(sizeof(int) * num_rows * num_cols); //Generate random values for all the points in the matrix generate_points(pca_data.matrix, num_rows, num_cols); // Print the points //dump_points(pca_data.matrix, num_rows, num_cols); /* Create the structure to store the mean value */ pca_data.unit_size = sizeof(int) * num_cols; // size of one row pca_data.next_start_row = pca_data.next_cov_row = 0; pca_data.mean = NULL; CHECK_ERROR (map_reduce_init ()); // Setup scheduler args for computing the mean memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &pca_data; map_reduce_args.map = pca_mean_map; map_reduce_args.reduce = NULL; // use identity reduce map_reduce_args.splitter = pca_mean_splitter; map_reduce_args.locator = pca_mean_locator; map_reduce_args.key_cmp = mymeancmp; map_reduce_args.unit_size = pca_data.unit_size; map_reduce_args.partition = NULL; // use default map_reduce_args.result = &pca_mean_vals; map_reduce_args.data_size = num_rows * num_cols * sizeof(int); map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 1024 * 16; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; printf("PCA Mean: Calling MapReduce Scheduler\n"); get_time (&end); #ifdef TIMING fprintf (stderr, "initialize: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); CHECK_ERROR(map_reduce(&map_reduce_args) < 0); get_time (&end); #ifdef TIMING library_time += time_diff (&end, &begin); #endif get_time (&begin); printf("PCA Mean: MapReduce Completed\n"); assert (pca_mean_vals.length == num_rows); //dprintf("Mean vector:\n"); pca_data.unit_size = sizeof(int) * num_cols * 2; // size of two rows pca_data.next_start_row = pca_data.next_cov_row = 0; pca_data.mean = pca_mean_vals.data; // array of keys and values - the keys have been freed tho // Setup Scheduler args for computing the covariance memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &pca_data; map_reduce_args.map = pca_cov_map; map_reduce_args.reduce = NULL; // use identity reduce map_reduce_args.splitter = pca_cov_splitter; map_reduce_args.locator = pca_cov_locator; map_reduce_args.key_cmp = mycovcmp; map_reduce_args.unit_size = pca_data.unit_size; map_reduce_args.partition = NULL; // use default map_reduce_args.result = &pca_cov_vals; // data size is number of elements that need to be calculated in a cov matrix // multiplied by the size of two rows for each element map_reduce_args.data_size = ((((num_rows * num_rows) - num_rows)/2) + num_rows) * pca_data.unit_size; map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 1024 * 16; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = atoi(GETENV("MR_KEYMATCHFACTOR"));//2; map_reduce_args.use_one_queue_per_task = true; printf("PCA Cov: Calling MapReduce Scheduler\n"); get_time (&end); #ifdef TIMING fprintf (stderr, "inter library: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); CHECK_ERROR(map_reduce(&map_reduce_args) < 0); get_time (&end); #ifdef TIMING library_time += time_diff (&end, &begin); fprintf (stderr, "library: %u\n", library_time); #endif get_time (&begin); CHECK_ERROR (map_reduce_finalize ()); printf("PCA Cov: MapReduce Completed\n"); assert(pca_cov_vals.length == ((((num_rows * num_rows) - num_rows)/2) + num_rows)); // Free the allocated structures int cnt = 0; intptr_t sum = 0; dprintf("\n\nCovariance sum: "); for (i = 0; i <pca_cov_vals.length; i++) { sum += (intptr_t)(pca_cov_vals.data[i].val); //dprintf("%5d ", ); cnt++; if (cnt == num_rows) { //dprintf("\n"); num_rows--; cnt = 0; } free(pca_cov_vals.data[i].key); } dprintf ("%" PRIdPTR "\n", sum); free (pca_cov_vals.data); free (pca_mean_vals.data); free (pca_data.matrix); get_time (&end); #ifdef TIMING fprintf (stderr, "finalize: %u\n", time_diff (&end, &begin)); #endif return 0; }
int main(int argc, char **argv) { final_data_t kmeans_vals; map_reduce_args_t map_reduce_args; int i; int *means; bool first_run; struct timeval begin, end; #ifdef TIMING unsigned int library_time = 0; unsigned int inter_library_time = 0; #endif get_time (&begin); parse_args(argc, argv); // get points kmeans_data.points = (int *)malloc(sizeof(int) * num_points * dim); generate_points(kmeans_data.points, num_points); // get means kmeans_data.means = (keyval_t *)malloc(sizeof(keyval_t) * num_means); means = malloc(sizeof(int) * dim * num_means); for (i=0; i<num_means; i++) { kmeans_data.means[i].val = &means[i * dim]; kmeans_data.means[i].key = malloc(sizeof(void *)); } generate_means(kmeans_data.means, num_means); kmeans_data.next_point = 0; kmeans_data.unit_size = sizeof(int) * dim; kmeans_data.clusters = (int *)malloc(sizeof(int) * num_points); memset(kmeans_data.clusters, -1, sizeof(int) * num_points); modified = true; CHECK_ERROR (map_reduce_init ()); // Setup map reduce args memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &kmeans_data; map_reduce_args.map = kmeans_map; map_reduce_args.reduce = kmeans_reduce; map_reduce_args.splitter = kmeans_splitter; map_reduce_args.locator = kmeans_locator; map_reduce_args.key_cmp = mykeycmp; map_reduce_args.unit_size = kmeans_data.unit_size; map_reduce_args.partition = NULL; // use default map_reduce_args.result = &kmeans_vals; map_reduce_args.data_size = (num_points + num_means) * dim * sizeof(int); map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 8; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; map_reduce_args.use_one_queue_per_task = true; printf("KMeans: Calling MapReduce Scheduler\n"); get_time (&end); #ifdef TIMING fprintf (stderr, "initialize: %u\n", time_diff (&end, &begin)); #endif first_run = true; while (modified == true) { modified = false; kmeans_data.next_point = 0; //dprintf("."); get_time (&begin); CHECK_ERROR (map_reduce (&map_reduce_args) < 0); get_time (&end); #ifdef TIMING library_time += time_diff (&end, &begin); #endif get_time (&begin); for (i = 0; i < kmeans_vals.length; i++) { int mean_idx = *((int *)(kmeans_vals.data[i].key)); if (first_run == false) free(kmeans_data.means[mean_idx].val); kmeans_data.means[mean_idx] = kmeans_vals.data[i]; } if (kmeans_vals.length > 0) free(kmeans_vals.data); get_time (&end); #ifdef TIMING inter_library_time += time_diff (&end, &begin); #endif first_run = false; } #ifdef TIMING fprintf (stderr, "library: %u\n", library_time); fprintf (stderr, "inter library: %u\n", inter_library_time); #endif get_time (&begin); CHECK_ERROR (map_reduce_finalize ()); dprintf("\n"); printf("KMeans: MapReduce Completed\n"); dprintf("\n\nFinal means:\n"); dump_means(kmeans_data.means, num_means); free(kmeans_data.points); for (i = 0; i < num_means; i++) { free(kmeans_data.means[i].key); free(kmeans_data.means[i].val); } free (kmeans_data.means); free (means); free(kmeans_data.clusters); get_time (&end); #ifdef TIMING fprintf (stderr, "finalize: %u\n", time_diff (&end, &begin)); #endif return 0; }
int main(int argc, char *argv[]) { final_data_t hist_vals; unsigned long long int i; int fd; char *fdata; struct stat finfo; char * fname; struct timeval begin, end; get_time (&begin); // Make sure a filename is specified if (argv[1] == NULL) { printf("USAGE: %s <bitmap filename>\n", argv[0]); exit(1); } fname = argv[1]; printf("Histogram: Running...\n"); // Read in the file CHECK_ERROR((fd = open(fname, O_RDONLY)) < 0); // Get the file info (for file length) CHECK_ERROR(fstat(fd, &finfo) < 0); #ifndef NO_MMAP // Memory map the file CHECK_ERROR((fdata = mmap(0, finfo.st_size + 1, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0)) == NULL); #else int ret; fdata = (char *)malloc (finfo.st_size); CHECK_ERROR (fdata == NULL); ret = read (fd, fdata, finfo.st_size); CHECK_ERROR (ret != finfo.st_size); #endif if ((fdata[0] != 'B') || (fdata[1] != 'M')) { printf("File is not a valid bitmap file. Exiting\n"); exit(1); } test_endianess(); // will set the variable "swap" unsigned short *bitsperpixel = (unsigned short *)(&(fdata[BITS_PER_PIXEL_POS])); if (swap) { swap_bytes((char *)(bitsperpixel), sizeof(*bitsperpixel)); } if (*bitsperpixel != 24) { // ensure its 3 bytes per pixel printf("Error: Invalid bitmap format - "); printf("This application only accepts 24-bit pictures. Exiting\n"); exit(1); } unsigned short *data_pos = (unsigned short *)(&(fdata[IMG_DATA_OFFSET_POS])); if (swap) { swap_bytes((char *)(data_pos), sizeof(*data_pos)); } unsigned long long int imgdata_bytes = finfo.st_size - (*(data_pos)); printf("This file has %llu bytes of image data, %llu pixels\n", imgdata_bytes, imgdata_bytes / 3); // We use this global variable arrays to store the "key" for each histogram // bucket. This is to prevent memory leaks in the mapreduce scheduler for (i = 0; i < 256; i++) { blue_keys[i] = i; green_keys[i] = 1000 + i; red_keys[i] = 2000 + i; } CHECK_ERROR (map_reduce_init ()); // Setup map reduce args map_reduce_args_t map_reduce_args; memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &(fdata[*data_pos]); //&hist_data; map_reduce_args.map = hist_map; map_reduce_args.reduce = hist_reduce; map_reduce_args.combiner = hist_combiner; map_reduce_args.splitter = NULL; //hist_splitter; map_reduce_args.key_cmp = myshortcmp; map_reduce_args.unit_size = 3; // 3 bytes per pixel map_reduce_args.partition = NULL; // use default map_reduce_args.result = &hist_vals; map_reduce_args.data_size = imgdata_bytes; map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 512; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; fprintf(stderr, "Histogram: Calling MapReduce Scheduler\n"); get_time (&end); #ifdef TIMING fprintf (stderr, "initialize: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); CHECK_ERROR( map_reduce (&map_reduce_args) < 0); get_time (&end); #ifdef TIMING fprintf (stderr, "library: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); CHECK_ERROR (map_reduce_finalize ()); short pix_val; intptr_t freq; short prev = 0; dprintf("\n\nBlue\n"); dprintf("----------\n\n"); for (i = 0; i < hist_vals.length; i++) { keyval_t * curr = &((keyval_t *)hist_vals.data)[i]; pix_val = *((short *)curr->key); freq = (intptr_t)curr->val; if (pix_val - prev > 700) { if (pix_val >= 2000) { dprintf("\n\nRed\n"); dprintf("----------\n\n"); } else if (pix_val >= 1000) { dprintf("\n\nGreen\n"); dprintf("----------\n\n"); } } dprintf("%hd - %" PRIdPTR "\n", pix_val % 1000, freq); prev = pix_val; } free(hist_vals.data); #ifndef NO_MMAP CHECK_ERROR (munmap (fdata, finfo.st_size + 1) < 0); #else free (fdata); #endif CHECK_ERROR (close (fd) < 0); get_time (&end); #ifdef TIMING fprintf (stderr, "finalize: %u\n", time_diff (&end, &begin)); #endif return 0; }
int main(int argc, char *argv[]) { final_data_t str_vals; struct timeval begin, end; struct timeval starttime,endtime; str_data_t str_data; get_time (&begin); CHECK_ERROR (map_reduce_init (&argc, &argv)); compute_hashes(key1, key1_final, strlen(key1)); compute_hashes(key2, key2_final, strlen(key2)); compute_hashes(key3, key3_final, strlen(key3)); compute_hashes(key4, key4_final, strlen(key4)); str_data.offset = 0; str_data.fname_keys = argv[1]; printf("String Match: Running...\n"); // Setup scheduler args map_reduce_args_t map_reduce_args; memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &str_data; map_reduce_args.task_data_size = sizeof(str_data_t); map_reduce_args.prep = sm_prep; map_reduce_args.cleanup = sm_cleanup; map_reduce_args.map = string_match_map; map_reduce_args.reduce = sm_reduce; map_reduce_args.splitter = string_match_splitter; map_reduce_args.key_cmp = mystrcmp; map_reduce_args.unit_size = DEFAULT_UNIT_SIZE; map_reduce_args.partition = NULL; // use default map_reduce_args.result = &str_vals; map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 512; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; printf("String Match: Calling String Match\n"); gettimeofday(&starttime,0); get_time (&end); #ifdef TIMING fprintf (stderr, "initialize: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); CHECK_ERROR (map_reduce (&map_reduce_args) < 0); get_time (&end); #ifdef TIMING fprintf (stderr, "library: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); gettimeofday(&endtime,0); printf("\nString Match Results:\n"); int i; for (i = 0; i < str_vals.length; i++) { keyval_t * curr = &((keyval_t *)str_vals.data)[i]; dprintf("%15s - %" PRIdPTR "\n", (char *)curr->key, (intptr_t)curr->val); } get_time (&end); map_reduce_cleanup(&map_reduce_args); CHECK_ERROR (map_reduce_finalize ()); #ifdef TIMING fprintf (stderr, "finalize: %u\n", time_diff (&end, &begin)); #endif return 0; }