Example #1
0
/** string_match()
 *  function that goes through file looking for matches to the given hashes 
 */
void string_match(str_data_t *data_in)
{
	assert(data_in);

	 char *key1 = "Helloworld";
	 char *key2 = "howareyou";
	 char *key3 = "ferrari";
	 char *key4 = "whotheman";

	 char *key1_final = malloc(strlen(key1) + 1);
	 char *key2_final = malloc(strlen(key2) + 1);
	 char *key3_final = malloc(strlen(key3) + 1);
	 char *key4_final = malloc(strlen(key4) + 1);

 //printf("%d %d\n", strlen(key1), strlen(key2));
	compute_hashes(key1, key1_final);
	compute_hashes(key2, key2_final);
	compute_hashes(key3, key3_final);
	compute_hashes(key4, key4_final);

	int key_len;
	char * key_file;
	key_file = data_in->keys_file;
	char * cur_word = malloc(MAX_REC_LEN);
	char * cur_word_final = malloc(MAX_REC_LEN);
	bzero(cur_word, MAX_REC_LEN);
	bzero(cur_word_final, MAX_REC_LEN);

    while( (key_len = getnextline(cur_word, MAX_REC_LEN, key_file))>=0)
    {
		compute_hashes(cur_word, cur_word_final);

	   if(!strcmp(key1_final, cur_word_final))
		   dprintf("FOUND: WORD IS %s\n", cur_word);

	   if(!strcmp(key2_final, cur_word_final))
		   dprintf("FOUND: WORD IS %s\n", cur_word);

	   if(!strcmp(key3_final, cur_word_final))
		   dprintf("FOUND: WORD IS %s\n", cur_word);

	   if(!strcmp(key4_final, cur_word_final))
		   dprintf("FOUND: WORD IS %s\n", cur_word);
		
	   key_file = key_file + key_len;
		bzero(cur_word, MAX_REC_LEN);
		bzero(cur_word_final, MAX_REC_LEN);
   }
   free(cur_word);
   free(cur_word_final);
   free(key1_final);
   free(key2_final);
   free(key3_final);
   free(key4_final);
}
Example #2
0
    void map(data_type& data, map_container& out) const
    {
        char cur_word_final[MAX_REC_LEN];

        int index = 0;
        while(index < data.keys_len)
        {
            char* key = data.keys + index;
            int len = 0;
            while(index+len < data.keys_len && data.keys[index+len] != '\r' && data.keys[index+len] != '\n')
                len++;

            compute_hashes(key, len, cur_word_final);

            if(!strcmp(key1_final, cur_word_final));
                dprintf("FOUND: WORD IS %s\n", key1);

            if(!strcmp(key2_final, cur_word_final));
                dprintf("FOUND: WORD IS %s\n", key2);

            if(!strcmp(key3_final, cur_word_final));
                dprintf("FOUND: WORD IS %s\n", key3);

            if(!strcmp(key4_final, cur_word_final));
                dprintf("FOUND: WORD IS %s\n", key4);

            index += len;
            while(index < data.keys_len && (data.keys[index] == '\r' || data.keys[index] == '\n'))
                index++;
        }
    }
/** string_match_map()
 *  Map Function that checks the hash of each word to the given hashes
 */
void string_match_map(map_args_t *args)
{
    assert(args);
    
    size_t key_len, total_len = 0;
    char *key_file = args->data;
    char *cur_word;

	char cur_word_final[MAX_REC_LEN];

    while(total_len < args->length) {
		for(;
			(*key_file == '\0' || *key_file == '\r' || *key_file == '\n') && total_len < args->length;
			key_file += 1, total_len += 1);

		if(total_len == args->length) break;

		for(cur_word = key_file, key_len = 0;
			*key_file != '\r' && *key_file != '\n' && total_len < args->length;
			key_file += 1, total_len += 1, key_len += 1);

		*key_file = 0;
		CHECK_ERROR(key_len <= 0);
		CHECK_ERROR(key_len > MAX_REC_LEN);

		memset(cur_word_final, 0, MAX_REC_LEN);
        compute_hashes(cur_word, cur_word_final, key_len);

        if(!strcmp(key1_final, cur_word_final)) {
			emit_intermediate(cur_word, (void *)1, key_len);
		}

        if(!strcmp(key2_final, cur_word_final)) {
			emit_intermediate(cur_word, (void *)1, key_len);
		}

        if(!strcmp(key3_final, cur_word_final)) {
			emit_intermediate(cur_word, (void *)1, key_len);
		}

        if(!strcmp(key4_final, cur_word_final)) {
			emit_intermediate(cur_word, (void *)1, key_len);
		}
    }
}
Example #4
0
/*
 * Complete the I/O operation that was posted asynchronously
 * earlier. We return a pointer to an array of integers
 * that indicate the error codes in case of failed I/O
 * or amount of I/O completed.
 * Callers responsibility to free it.
 */
static int* hash_buffered_read_complete(long _uptr)
{
		struct user_ptr *uptr = NULL;
		int *completed;

		uptr = (struct user_ptr *) _uptr;
		completed = uptr->completed;
		/*
		 * Compute the hashes for the file
		 * here. Note, at some point
		 * this would become an RPC call, 
		 * right now, I just compute it locally
		 */
		compute_hashes(uptr);
		/* Deallocate the user pointer */
		dealloc_user_ptr(uptr);
		return completed;
}
/** string_match_map()
 *  Map Function that checks the hash of each word to the given hashes
 */
void *string_match_map(void *args)
{
    assert(args);
    
    str_map_data_t* data_in = (str_map_data_t*)( ((map_args_t*)args)->data);

	int key_len, total_len = 0;
	char * key_file = data_in->keys_file;
	char * cur_word = (char*)malloc(MAX_REC_LEN);
	char * cur_word_final = (char*)malloc(MAX_REC_LEN);
	bzero(cur_word, MAX_REC_LEN);
	bzero(cur_word_final, MAX_REC_LEN);

	while( (total_len < ((map_args_t*)args)->length) && ((key_len = getnextline(cur_word, MAX_REC_LEN, key_file)) >= 0))
     {
		compute_hashes(cur_word, cur_word_final);

	    if(!strcmp(key1_final, cur_word_final))
		    dprintf("FOUND: WORD IS %s\n", cur_word);

	    if(!strcmp(key2_final, cur_word_final))
		    dprintf("FOUND: WORD IS %s\n", cur_word);

	    if(!strcmp(key3_final, cur_word_final))
		    dprintf("FOUND: WORD IS %s\n", cur_word);

	    if(!strcmp(key4_final, cur_word_final))
		    dprintf("FOUND: WORD IS %s\n", cur_word);

		key_file = key_file + key_len;
		bzero(cur_word,MAX_REC_LEN);
		bzero(cur_word_final, MAX_REC_LEN);
		total_len+=key_len;
		
		COZ_PROGRESS;
    }
    free(cur_word);
    free(cur_word_final); 
    return (void *)0;
}
Example #6
0
 explicit traits(size_t capacity, double error_rate)
     : cells(compute_cells(capacity, error_rate)),
       hashes(compute_hashes(capacity, cells)) {}
/** string_match_splitter()
 *  Splitter Function to assign portions of the file to each thread
 */
void string_match_splitter(void *data_in,int n)
{
    key1_final = malloc(strlen(key1) + 1);
    key2_final = malloc(strlen(key2) + 1);
    key3_final = malloc(strlen(key3) + 1);
    key4_final = malloc(strlen(key4) + 1);

    compute_hashes(key1, key1_final);
    compute_hashes(key2, key2_final);
    compute_hashes(key3, key3_final);
    compute_hashes(key4, key4_final);

    pthread_attr_t attr;
    pthread_t * tid;
    int i;
    int num_procs = n;

    CHECK_ERROR(num_procs <= 0);
    printf("THe number of processors is %d\n", num_procs);

    str_data_t * data = (str_data_t *)data_in;

    /* Check whether the various terms exist */
    assert(data_in);

    tid = (pthread_t *)MALLOC(num_procs * sizeof(pthread_t));

    /* Thread must be scheduled systemwide */
    pthread_attr_init(&attr);
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);

    int req_bytes = data->keys_file_len / num_procs;

    str_map_data_t *map_data = (str_map_data_t*)malloc(sizeof(str_map_data_t)
                               * num_procs);
    map_args_t* out = (map_args_t*)malloc(sizeof(map_args_t) * num_procs);

    for(i=0; i<num_procs; i++)
    {
        map_data[i].encrypt_file = data->encrypt_file;
        map_data[i].keys_file = data->keys_file + data->bytes_comp;
        map_data[i].TID = i;

        /* Assign the required number of bytes */
        int available_bytes = data->keys_file_len - data->bytes_comp;
        if(available_bytes < 0)
            available_bytes = 0;

        out[i].length = (req_bytes < available_bytes)? req_bytes:available_bytes;
        out[i].data = &(map_data[i]);


        char* final_ptr = map_data[i].keys_file + out[i].length;
        int counter = data->bytes_comp + out[i].length;

        /* make sure we end at a word */
        while(counter <= data->keys_file_len && *final_ptr != '\n'
                && *final_ptr != '\r' && *final_ptr != '\0')
        {
            counter++;
            final_ptr++;
        }
        if(*final_ptr == '\r')
            counter+=2;
        else if(*final_ptr == '\n')
            counter++;

        out[i].length = counter - data->bytes_comp;
        data->bytes_comp = counter;
        CHECK_ERROR(pthread_create(&tid[i], &attr, string_match_map,
                                   (void*)(&(out[i]))) != 0);
    }

    /* Barrier, wait for all threads to finish */
    for (i = 0; i < num_procs; i++)
    {
        int ret_val;
        CHECK_ERROR(pthread_join(tid[i], (void **)(void*)&ret_val) != 0);
        CHECK_ERROR(ret_val != 0);
    }
    pthread_attr_destroy(&attr);
    free(tid);
    free(key1_final);
    free(key2_final);
    free(key3_final);
    free(key4_final);
    free(out);
    free(map_data);
}
Example #8
0
int main(int argc, char *argv[]) {
    
    int fd_keys;
    char *fdata_keys;
    struct stat finfo_keys;
    char *fname_keys;

    struct timespec begin, end;

    get_time (begin);

    if (argv[1] == NULL)
    {
        printf("USAGE: %s <keys filename>\n", argv[0]);
        exit(1);
    }
    fname_keys = argv[1];

    printf("String Match: Running...\n");

    // Read in the file
    CHECK_ERROR((fd_keys = open(fname_keys,O_RDONLY)) < 0);
    // Get the file info (for file length)
    CHECK_ERROR(fstat(fd_keys, &finfo_keys) < 0);
#ifndef NO_MMAP
#ifdef MMAP_POPULATE
    // Memory map the file
    CHECK_ERROR((fdata_keys = (char*)mmap(0, finfo_keys.st_size + 1, 
        PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd_keys, 0)) == NULL);
#else
    // Memory map the file
    CHECK_ERROR((fdata_keys = (char*)mmap(0, finfo_keys.st_size + 1, 
        PROT_READ, MAP_PRIVATE, fd_keys, 0)) == NULL);
#endif
#else
    int ret;

    fdata_keys = (char *)malloc (finfo_keys.st_size);
    CHECK_ERROR (fdata_keys == NULL);

    ret = read (fd_keys, fdata_keys, finfo_keys.st_size);
    CHECK_ERROR (ret != finfo_keys.st_size);
#endif

    key1_final = (char*)malloc(strlen(key1)+1);
    key2_final = (char*)malloc(strlen(key2)+1);
    key3_final = (char*)malloc(strlen(key3)+1);
    key4_final = (char*)malloc(strlen(key4)+1);

    compute_hashes(key1, strlen(key1), key1_final);
    compute_hashes(key2, strlen(key2), key2_final);
    compute_hashes(key3, strlen(key3), key3_final);
    compute_hashes(key4, strlen(key4), key4_final);
    
    get_time (end);

    print_time("initialize", begin, end);

    printf("String Match: Calling String Match\n");

    get_time (begin);
    MatchMR mr(fdata_keys, finfo_keys.st_size, NULL, 0, 64*1024);
    std::vector<MatchMR::keyval> out;
    CHECK_ERROR (mr.run(out) < 0);
    get_time (end);

    print_time("library", begin, end);

    get_time (begin);

    free(key1_final);
    free(key2_final);
    free(key3_final);
    free(key4_final);

#ifndef NO_MMAP
    CHECK_ERROR(munmap(fdata_keys, finfo_keys.st_size + 1) < 0);
#else
    free (fdata_keys);
#endif
    CHECK_ERROR(close(fd_keys) < 0);

    get_time (end);

    print_time("finalize", begin, end);

    return 0;
}
int main(int argc, char *argv[]) {
    final_data_t str_vals;
    struct timeval begin, end;
    struct timeval starttime,endtime;
    str_data_t str_data;

    get_time (&begin);

    CHECK_ERROR (map_reduce_init (&argc, &argv));

	compute_hashes(key1, key1_final, strlen(key1));
	compute_hashes(key2, key2_final, strlen(key2));
	compute_hashes(key3, key3_final, strlen(key3));
	compute_hashes(key4, key4_final, strlen(key4));

    str_data.offset = 0;
    str_data.fname_keys = argv[1];

    printf("String Match: Running...\n");

    // Setup scheduler args
    map_reduce_args_t map_reduce_args;
    memset(&map_reduce_args, 0, sizeof(map_reduce_args_t));
    map_reduce_args.task_data = &str_data;
	map_reduce_args.task_data_size = sizeof(str_data_t);
	
	map_reduce_args.prep = sm_prep;
	map_reduce_args.cleanup = sm_cleanup;
    map_reduce_args.map = string_match_map;
    map_reduce_args.reduce = sm_reduce;
    map_reduce_args.splitter = string_match_splitter;
    map_reduce_args.key_cmp = mystrcmp;
	
    map_reduce_args.unit_size = DEFAULT_UNIT_SIZE;
    map_reduce_args.partition = NULL; // use default
    map_reduce_args.result = &str_vals;
	
    map_reduce_args.L1_cache_size = atoi(GETENV("MR_L1CACHESIZE"));//1024 * 512;
    map_reduce_args.num_map_threads = atoi(GETENV("MR_NUMTHREADS"));//8;
    map_reduce_args.num_reduce_threads = atoi(GETENV("MR_NUMTHREADS"));//16;
    map_reduce_args.num_merge_threads = atoi(GETENV("MR_NUMTHREADS"));//8;
    map_reduce_args.num_procs = atoi(GETENV("MR_NUMPROCS"));//16;
    map_reduce_args.key_match_factor = (float)atof(GETENV("MR_KEYMATCHFACTOR"));//2;

    printf("String Match: Calling String Match\n");

    gettimeofday(&starttime,0);

    get_time (&end);

#ifdef TIMING
    fprintf (stderr, "initialize: %u\n", time_diff (&end, &begin));
#endif

    get_time (&begin);
    CHECK_ERROR (map_reduce (&map_reduce_args) < 0);
    get_time (&end);

#ifdef TIMING
    fprintf (stderr, "library: %u\n", time_diff (&end, &begin));
#endif

    get_time (&begin);

    gettimeofday(&endtime,0);

    printf("\nString Match Results:\n");
	int i;
    for (i = 0; i < str_vals.length; i++) {
		keyval_t * curr = &((keyval_t *)str_vals.data)[i];
		dprintf("%15s - %" PRIdPTR "\n", (char *)curr->key, (intptr_t)curr->val);
    }

    get_time (&end);

	map_reduce_cleanup(&map_reduce_args);
    CHECK_ERROR (map_reduce_finalize ());

#ifdef TIMING
    fprintf (stderr, "finalize: %u\n", time_diff (&end, &begin));
#endif

    return 0;
}