/** string_match() * function that goes through file looking for matches to the given hashes */ void string_match(str_data_t *data_in) { assert(data_in); char *key1 = "Helloworld"; char *key2 = "howareyou"; char *key3 = "ferrari"; char *key4 = "whotheman"; char *key1_final = malloc(strlen(key1) + 1); char *key2_final = malloc(strlen(key2) + 1); char *key3_final = malloc(strlen(key3) + 1); char *key4_final = malloc(strlen(key4) + 1); //printf("%d %d\n", strlen(key1), strlen(key2)); compute_hashes(key1, key1_final); compute_hashes(key2, key2_final); compute_hashes(key3, key3_final); compute_hashes(key4, key4_final); int key_len; char * key_file; key_file = data_in->keys_file; char * cur_word = malloc(MAX_REC_LEN); char * cur_word_final = malloc(MAX_REC_LEN); bzero(cur_word, MAX_REC_LEN); bzero(cur_word_final, MAX_REC_LEN); while( (key_len = getnextline(cur_word, MAX_REC_LEN, key_file))>=0) { compute_hashes(cur_word, cur_word_final); if(!strcmp(key1_final, cur_word_final)) dprintf("FOUND: WORD IS %s\n", cur_word); if(!strcmp(key2_final, cur_word_final)) dprintf("FOUND: WORD IS %s\n", cur_word); if(!strcmp(key3_final, cur_word_final)) dprintf("FOUND: WORD IS %s\n", cur_word); if(!strcmp(key4_final, cur_word_final)) dprintf("FOUND: WORD IS %s\n", cur_word); key_file = key_file + key_len; bzero(cur_word, MAX_REC_LEN); bzero(cur_word_final, MAX_REC_LEN); } free(cur_word); free(cur_word_final); free(key1_final); free(key2_final); free(key3_final); free(key4_final); }
void map(data_type& data, map_container& out) const { char cur_word_final[MAX_REC_LEN]; int index = 0; while(index < data.keys_len) { char* key = data.keys + index; int len = 0; while(index+len < data.keys_len && data.keys[index+len] != '\r' && data.keys[index+len] != '\n') len++; compute_hashes(key, len, cur_word_final); if(!strcmp(key1_final, cur_word_final)); dprintf("FOUND: WORD IS %s\n", key1); if(!strcmp(key2_final, cur_word_final)); dprintf("FOUND: WORD IS %s\n", key2); if(!strcmp(key3_final, cur_word_final)); dprintf("FOUND: WORD IS %s\n", key3); if(!strcmp(key4_final, cur_word_final)); dprintf("FOUND: WORD IS %s\n", key4); index += len; while(index < data.keys_len && (data.keys[index] == '\r' || data.keys[index] == '\n')) index++; } }
/** string_match_map() * Map Function that checks the hash of each word to the given hashes */ void string_match_map(map_args_t *args) { assert(args); size_t key_len, total_len = 0; char *key_file = args->data; char *cur_word; char cur_word_final[MAX_REC_LEN]; while(total_len < args->length) { for(; (*key_file == '\0' || *key_file == '\r' || *key_file == '\n') && total_len < args->length; key_file += 1, total_len += 1); if(total_len == args->length) break; for(cur_word = key_file, key_len = 0; *key_file != '\r' && *key_file != '\n' && total_len < args->length; key_file += 1, total_len += 1, key_len += 1); *key_file = 0; CHECK_ERROR(key_len <= 0); CHECK_ERROR(key_len > MAX_REC_LEN); memset(cur_word_final, 0, MAX_REC_LEN); compute_hashes(cur_word, cur_word_final, key_len); if(!strcmp(key1_final, cur_word_final)) { emit_intermediate(cur_word, (void *)1, key_len); } if(!strcmp(key2_final, cur_word_final)) { emit_intermediate(cur_word, (void *)1, key_len); } if(!strcmp(key3_final, cur_word_final)) { emit_intermediate(cur_word, (void *)1, key_len); } if(!strcmp(key4_final, cur_word_final)) { emit_intermediate(cur_word, (void *)1, key_len); } } }
/* * Complete the I/O operation that was posted asynchronously * earlier. We return a pointer to an array of integers * that indicate the error codes in case of failed I/O * or amount of I/O completed. * Callers responsibility to free it. */ static int* hash_buffered_read_complete(long _uptr) { struct user_ptr *uptr = NULL; int *completed; uptr = (struct user_ptr *) _uptr; completed = uptr->completed; /* * Compute the hashes for the file * here. Note, at some point * this would become an RPC call, * right now, I just compute it locally */ compute_hashes(uptr); /* Deallocate the user pointer */ dealloc_user_ptr(uptr); return completed; }
/** string_match_map() * Map Function that checks the hash of each word to the given hashes */ void *string_match_map(void *args) { assert(args); str_map_data_t* data_in = (str_map_data_t*)( ((map_args_t*)args)->data); int key_len, total_len = 0; char * key_file = data_in->keys_file; char * cur_word = (char*)malloc(MAX_REC_LEN); char * cur_word_final = (char*)malloc(MAX_REC_LEN); bzero(cur_word, MAX_REC_LEN); bzero(cur_word_final, MAX_REC_LEN); while( (total_len < ((map_args_t*)args)->length) && ((key_len = getnextline(cur_word, MAX_REC_LEN, key_file)) >= 0)) { compute_hashes(cur_word, cur_word_final); if(!strcmp(key1_final, cur_word_final)) dprintf("FOUND: WORD IS %s\n", cur_word); if(!strcmp(key2_final, cur_word_final)) dprintf("FOUND: WORD IS %s\n", cur_word); if(!strcmp(key3_final, cur_word_final)) dprintf("FOUND: WORD IS %s\n", cur_word); if(!strcmp(key4_final, cur_word_final)) dprintf("FOUND: WORD IS %s\n", cur_word); key_file = key_file + key_len; bzero(cur_word,MAX_REC_LEN); bzero(cur_word_final, MAX_REC_LEN); total_len+=key_len; COZ_PROGRESS; } free(cur_word); free(cur_word_final); return (void *)0; }
explicit traits(size_t capacity, double error_rate) : cells(compute_cells(capacity, error_rate)), hashes(compute_hashes(capacity, cells)) {}
/** string_match_splitter() * Splitter Function to assign portions of the file to each thread */ void string_match_splitter(void *data_in,int n) { key1_final = malloc(strlen(key1) + 1); key2_final = malloc(strlen(key2) + 1); key3_final = malloc(strlen(key3) + 1); key4_final = malloc(strlen(key4) + 1); compute_hashes(key1, key1_final); compute_hashes(key2, key2_final); compute_hashes(key3, key3_final); compute_hashes(key4, key4_final); pthread_attr_t attr; pthread_t * tid; int i; int num_procs = n; CHECK_ERROR(num_procs <= 0); printf("THe number of processors is %d\n", num_procs); str_data_t * data = (str_data_t *)data_in; /* Check whether the various terms exist */ assert(data_in); tid = (pthread_t *)MALLOC(num_procs * sizeof(pthread_t)); /* Thread must be scheduled systemwide */ pthread_attr_init(&attr); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); int req_bytes = data->keys_file_len / num_procs; str_map_data_t *map_data = (str_map_data_t*)malloc(sizeof(str_map_data_t) * num_procs); map_args_t* out = (map_args_t*)malloc(sizeof(map_args_t) * num_procs); for(i=0; i<num_procs; i++) { map_data[i].encrypt_file = data->encrypt_file; map_data[i].keys_file = data->keys_file + data->bytes_comp; map_data[i].TID = i; /* Assign the required number of bytes */ int available_bytes = data->keys_file_len - data->bytes_comp; if(available_bytes < 0) available_bytes = 0; out[i].length = (req_bytes < available_bytes)? req_bytes:available_bytes; out[i].data = &(map_data[i]); char* final_ptr = map_data[i].keys_file + out[i].length; int counter = data->bytes_comp + out[i].length; /* make sure we end at a word */ while(counter <= data->keys_file_len && *final_ptr != '\n' && *final_ptr != '\r' && *final_ptr != '\0') { counter++; final_ptr++; } if(*final_ptr == '\r') counter+=2; else if(*final_ptr == '\n') counter++; out[i].length = counter - data->bytes_comp; data->bytes_comp = counter; CHECK_ERROR(pthread_create(&tid[i], &attr, string_match_map, (void*)(&(out[i]))) != 0); } /* Barrier, wait for all threads to finish */ for (i = 0; i < num_procs; i++) { int ret_val; CHECK_ERROR(pthread_join(tid[i], (void **)(void*)&ret_val) != 0); CHECK_ERROR(ret_val != 0); } pthread_attr_destroy(&attr); free(tid); free(key1_final); free(key2_final); free(key3_final); free(key4_final); free(out); free(map_data); }
int main(int argc, char *argv[]) { int fd_keys; char *fdata_keys; struct stat finfo_keys; char *fname_keys; struct timespec begin, end; get_time (begin); if (argv[1] == NULL) { printf("USAGE: %s <keys filename>\n", argv[0]); exit(1); } fname_keys = argv[1]; printf("String Match: Running...\n"); // Read in the file CHECK_ERROR((fd_keys = open(fname_keys,O_RDONLY)) < 0); // Get the file info (for file length) CHECK_ERROR(fstat(fd_keys, &finfo_keys) < 0); #ifndef NO_MMAP #ifdef MMAP_POPULATE // Memory map the file CHECK_ERROR((fdata_keys = (char*)mmap(0, finfo_keys.st_size + 1, PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd_keys, 0)) == NULL); #else // Memory map the file CHECK_ERROR((fdata_keys = (char*)mmap(0, finfo_keys.st_size + 1, PROT_READ, MAP_PRIVATE, fd_keys, 0)) == NULL); #endif #else int ret; fdata_keys = (char *)malloc (finfo_keys.st_size); CHECK_ERROR (fdata_keys == NULL); ret = read (fd_keys, fdata_keys, finfo_keys.st_size); CHECK_ERROR (ret != finfo_keys.st_size); #endif key1_final = (char*)malloc(strlen(key1)+1); key2_final = (char*)malloc(strlen(key2)+1); key3_final = (char*)malloc(strlen(key3)+1); key4_final = (char*)malloc(strlen(key4)+1); compute_hashes(key1, strlen(key1), key1_final); compute_hashes(key2, strlen(key2), key2_final); compute_hashes(key3, strlen(key3), key3_final); compute_hashes(key4, strlen(key4), key4_final); get_time (end); print_time("initialize", begin, end); printf("String Match: Calling String Match\n"); get_time (begin); MatchMR mr(fdata_keys, finfo_keys.st_size, NULL, 0, 64*1024); std::vector<MatchMR::keyval> out; CHECK_ERROR (mr.run(out) < 0); get_time (end); print_time("library", begin, end); get_time (begin); free(key1_final); free(key2_final); free(key3_final); free(key4_final); #ifndef NO_MMAP CHECK_ERROR(munmap(fdata_keys, finfo_keys.st_size + 1) < 0); #else free (fdata_keys); #endif CHECK_ERROR(close(fd_keys) < 0); get_time (end); print_time("finalize", begin, end); return 0; }
int main(int argc, char *argv[]) { final_data_t str_vals; struct timeval begin, end; struct timeval starttime,endtime; str_data_t str_data; get_time (&begin); CHECK_ERROR (map_reduce_init (&argc, &argv)); compute_hashes(key1, key1_final, strlen(key1)); compute_hashes(key2, key2_final, strlen(key2)); compute_hashes(key3, key3_final, strlen(key3)); compute_hashes(key4, key4_final, strlen(key4)); str_data.offset = 0; str_data.fname_keys = argv[1]; printf("String Match: Running...\n"); // Setup scheduler args map_reduce_args_t map_reduce_args; memset(&map_reduce_args, 0, sizeof(map_reduce_args_t)); map_reduce_args.task_data = &str_data; map_reduce_args.task_data_size = sizeof(str_data_t); map_reduce_args.prep = sm_prep; map_reduce_args.cleanup = sm_cleanup; map_reduce_args.map = string_match_map; map_reduce_args.reduce = sm_reduce; map_reduce_args.splitter = string_match_splitter; map_reduce_args.key_cmp = mystrcmp; map_reduce_args.unit_size = DEFAULT_UNIT_SIZE; map_reduce_args.partition = NULL; // use default map_reduce_args.result = &str_vals; map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 512; map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16; map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8; map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16; map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2; printf("String Match: Calling String Match\n"); gettimeofday(&starttime,0); get_time (&end); #ifdef TIMING fprintf (stderr, "initialize: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); CHECK_ERROR (map_reduce (&map_reduce_args) < 0); get_time (&end); #ifdef TIMING fprintf (stderr, "library: %u\n", time_diff (&end, &begin)); #endif get_time (&begin); gettimeofday(&endtime,0); printf("\nString Match Results:\n"); int i; for (i = 0; i < str_vals.length; i++) { keyval_t * curr = &((keyval_t *)str_vals.data)[i]; dprintf("%15s - %" PRIdPTR "\n", (char *)curr->key, (intptr_t)curr->val); } get_time (&end); map_reduce_cleanup(&map_reduce_args); CHECK_ERROR (map_reduce_finalize ()); #ifdef TIMING fprintf (stderr, "finalize: %u\n", time_diff (&end, &begin)); #endif return 0; }