Beispiel #1
0
void fch_destroy(cmph_t *mphf)
{
	fch_data_t *data = (fch_data_t *)mphf->data;
	free(data->g);
	hash_state_destroy(data->h1);
	hash_state_destroy(data->h2);
	free(data);
	free(mphf);
}
Beispiel #2
0
void bmz8_destroy(cmph_t *mphf)
{
	bmz8_data_t *data = (bmz8_data_t *)mphf->data;
	free(data->g);
	hash_state_destroy(data->hashes[0]);
	hash_state_destroy(data->hashes[1]);
	free(data->hashes);
	free(data);
	free(mphf);
}
Beispiel #3
0
static fch_buckets_t * mapping(cmph_config_t *mph)
{
	cmph_uint32 i = 0;
	fch_buckets_t *buckets = NULL;
	fch_config_data_t *fch = (fch_config_data_t *)mph->data;
	if (fch->h1) hash_state_destroy(fch->h1);
	fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m);  
	fch->b = fch_calc_b(fch->c, fch->m);
	fch->p1 = fch_calc_p1(fch->m);
	fch->p2 = fch_calc_p2(fch->b);
	//DEBUGP("b:%u   p1:%f   p2:%f\n", fch->b, fch->p1, fch->p2);
	buckets = fch_buckets_new(fch->b);

	mph->key_source->rewind(mph->key_source->data);  
	for(i = 0; i < fch->m; i++)
	{
		cmph_uint32 h1, keylen;
		char *key = NULL;
		mph->key_source->read(mph->key_source->data, &key, &keylen);	
		h1 = hash(fch->h1, key, keylen) % fch->m;
		h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1);
		fch_buckets_insert(buckets, h1, key, keylen);
		key = NULL; // transger memory ownership
		
	}
	return buckets;  
}
Beispiel #4
0
void chd_ph_destroy(cmph_t *mphf)
{
	chd_ph_data_t *data = (chd_ph_data_t *)mphf->data;
	compressed_seq_destroy(data->cs);
	free(data->cs);
	hash_state_destroy(data->hl);
	free(data);
	free(mphf);

}
void brz_destroy(cmph_t *mphf)
{
	cmph_uint32 i;
	brz_data_t *data = (brz_data_t *)mphf->data;
	if(data->g)
	{
		for(i = 0; i < data->k; i++)
		{
			free(data->g[i]);
			hash_state_destroy(data->h1[i]);
			hash_state_destroy(data->h2[i]);
		}
		free(data->g);
		free(data->h1);
		free(data->h2);
	}
	hash_state_destroy(data->h0);
	free(data->size);
	free(data->offset);
	free(data);
	free(mphf);
}
Beispiel #6
0
cmph_t *bmz8_new(cmph_config_t *mph, double c)
{
	cmph_t *mphf = NULL;
	bmz8_data_t *bmz8f = NULL;
	cmph_uint8 i;
	cmph_uint8 iterations;
	cmph_uint8 iterations_map = 20;
	cmph_uint8 *used_edges = NULL;
	cmph_uint8 restart_mapping = 0;
	cmph_uint8 * visited = NULL;
	bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data;

	if (mph->key_source->nkeys >= 256)
	{
		if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n");
		return NULL;
	}
	if (c == 0) c = 1.15; // validating restrictions over parameter c.
	DEBUGP("c: %f\n", c);
	bmz8->m = (cmph_uint8) mph->key_source->nkeys;
	bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys);
	DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c);
	bmz8->graph = graph_new(bmz8->n, bmz8->m);
	DEBUGP("Created graph\n");

	bmz8->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
	for(i = 0; i < 3; ++i) bmz8->hashes[i] = NULL;

	do
	{
	  // Mapping step
	  cmph_uint8 biggest_g_value = 0;
	  cmph_uint8 biggest_edge_value = 1;
	  iterations = 100;
	  if (mph->verbosity)
	  {
		fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bmz8->m, bmz8->n);
	  }
	  while(1)
	  {
		int ok;
		DEBUGP("hash function 1\n");
		bmz8->hashes[0] = hash_state_new(bmz8->hashfuncs[0], bmz8->n);
		DEBUGP("hash function 2\n");
		bmz8->hashes[1] = hash_state_new(bmz8->hashfuncs[1], bmz8->n);
		DEBUGP("Generating edges\n");
		ok = bmz8_gen_edges(mph);
		if (!ok)
		{
			--iterations;
			hash_state_destroy(bmz8->hashes[0]);
			bmz8->hashes[0] = NULL;
			hash_state_destroy(bmz8->hashes[1]);
			bmz8->hashes[1] = NULL;
			DEBUGP("%u iterations remaining\n", iterations);
			if (mph->verbosity)
			{
				fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations);
			}
			if (iterations == 0) break;
		}
		else break;
	  }
	  if (iterations == 0)
	  {
		graph_destroy(bmz8->graph);
		return NULL;
	  }

	  // Ordering step
	  if (mph->verbosity)
	  {
		fprintf(stderr, "Starting ordering step\n");
	  }

	  graph_obtain_critical_nodes(bmz8->graph);

	  // Searching step
	  if (mph->verbosity)
	  {
		fprintf(stderr, "Starting Searching step.\n");
		fprintf(stderr, "\tTraversing critical vertices.\n");
	  }
	  DEBUGP("Searching step\n");
	  visited = (cmph_uint8 *)malloc((size_t)bmz8->n/8 + 1);
	  memset(visited, 0, (size_t)bmz8->n/8 + 1);
	  used_edges = (cmph_uint8 *)malloc((size_t)bmz8->m/8 + 1);
	  memset(used_edges, 0, (size_t)bmz8->m/8 + 1);
	  free(bmz8->g);
	  bmz8->g = (cmph_uint8 *)calloc((size_t)bmz8->n, sizeof(cmph_uint8));
	  assert(bmz8->g);
	  for (i = 0; i < bmz8->n; ++i) // critical nodes
	  {
                if (graph_node_is_critical(bmz8->graph, i) && (!GETBIT(visited,i)))
		{
		  if(c > 1.14) restart_mapping = bmz8_traverse_critical_nodes(bmz8, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
		  else restart_mapping = bmz8_traverse_critical_nodes_heuristic(bmz8, i, &biggest_g_value, &biggest_edge_value, used_edges, visited);
		  if(restart_mapping) break;
		}
	  }
	  if(!restart_mapping)
	  {
	        if (mph->verbosity)
	        {
		  fprintf(stderr, "\tTraversing non critical vertices.\n");
		}
		bmz8_traverse_non_critical_nodes(bmz8, used_edges, visited); // non_critical_nodes
	  }
	  else
	  {
 	        iterations_map--;
		if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map);
	  }

	  free(used_edges);
	  free(visited);

	}while(restart_mapping && iterations_map > 0);
	graph_destroy(bmz8->graph);
	bmz8->graph = NULL;
	if (iterations_map == 0)
	{
		return NULL;
	}
	mphf = (cmph_t *)malloc(sizeof(cmph_t));
	mphf->algo = mph->algo;
	bmz8f = (bmz8_data_t *)malloc(sizeof(bmz8_data_t));
	bmz8f->g = bmz8->g;
	bmz8->g = NULL; //transfer memory ownership
	bmz8f->hashes = bmz8->hashes;
	bmz8->hashes = NULL; //transfer memory ownership
	bmz8f->n = bmz8->n;
	bmz8f->m = bmz8->m;
	mphf->data = bmz8f;
	mphf->size = bmz8->m;
	DEBUGP("Successfully generated minimal perfect hash\n");
	if (mph->verbosity)
	{
		fprintf(stderr, "Successfully generated minimal perfect hash function\n");
	}
	return mphf;
}
Beispiel #7
0
static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph_uint32 *sorted_indexes)
{
	cmph_uint32 * random_table = (cmph_uint32 *) calloc((size_t)fch->m, sizeof(cmph_uint32));
	cmph_uint32 * map_table    = (cmph_uint32 *) calloc((size_t)fch->m, sizeof(cmph_uint32));
	cmph_uint32 iteration_to_generate_h2 = 0;
	cmph_uint32 searching_iterations     = 0;
	cmph_uint8 restart                   = 0;
	cmph_uint32 nbuckets                 = fch_buckets_get_nbuckets(buckets);
	cmph_uint32 i, j, z, counter = 0, filled_count = 0;
	if (fch->g) free (fch->g);
	fch->g = (cmph_uint32 *) calloc((size_t)fch->b, sizeof(cmph_uint32));

	//DEBUGP("max bucket size: %u\n", fch_buckets_get_max_size(buckets));

	for(i = 0; i < fch->m; i++)
	{
		random_table[i] = i;
	}
	permut(random_table, fch->m);
	for(i = 0; i < fch->m; i++)
	{
		map_table[random_table[i]] = i;
	}
	do {   
		if (fch->h2) hash_state_destroy(fch->h2);
		fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m);  
		restart = check_for_collisions_h2(fch, buckets, sorted_indexes);
		filled_count = 0;
		if (!restart) 
		{
			searching_iterations++; iteration_to_generate_h2 = 0;
			//DEBUGP("searching_iterations: %u\n", searching_iterations);
		}
		else {
			iteration_to_generate_h2++;
			//DEBUGP("iteration_to_generate_h2: %u\n", iteration_to_generate_h2);
		}		
		for(i = 0; (i < nbuckets) && !restart; i++) {
			cmph_uint32 bucketsize = fch_buckets_get_size(buckets, sorted_indexes[i]);
			if (bucketsize == 0)
			{
				restart = 0; // false
				break;
			}
			else restart = 1; // true
			for(z = 0; (z < (fch->m - filled_count)) && restart; z++) {
				char * key = fch_buckets_get_key(buckets, sorted_indexes[i], INDEX);
				cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], INDEX);
				cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m;				
				counter = 0; 
				restart = 0; // false
				fch->g[sorted_indexes[i]] = (fch->m + random_table[filled_count + z] - h2) % fch->m;
				//DEBUGP("g[%u]: %u\n", sorted_indexes[i], fch->g[sorted_indexes[i]]);
				j = INDEX;
				do {
					cmph_uint32 index = 0;
					key = fch_buckets_get_key(buckets, sorted_indexes[i], j);
					keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], j);
					h2 = hash(fch->h2, key, keylen) % fch->m;
					index = (h2 + fch->g[sorted_indexes[i]]) % fch->m;
					//DEBUGP("key:%s  keylen:%u  index: %u  h2:%u  bucketsize:%u\n", key, keylen, index, h2, bucketsize);
					if (map_table[index] >= filled_count) {  
						cmph_uint32 y  = map_table[index];
						cmph_uint32 ry = random_table[y];
						random_table[y] = random_table[filled_count];
						random_table[filled_count] = ry;
						map_table[random_table[y]] = y;
						map_table[random_table[filled_count]] = filled_count;
						filled_count++;
						counter ++; 
					}
					else { 
						restart = 1; // true
						filled_count = filled_count - counter;
						counter = 0; 
						break;
					}
					j = (j + 1) % bucketsize;
				} while(j % bucketsize != INDEX); 
			}
			//getchar();
		}              
		//} while(restart  && (searching_iterations < 10));
	} while(restart  && (searching_iterations < 10) && (iteration_to_generate_h2 < 1000)); // DC
	free(map_table);
	free(random_table);
	return restart;
}
Beispiel #8
0
cmph_t *chd_ph_new(cmph_config_t *mph, double c)
{
	cmph_t *mphf = NULL;
	chd_ph_data_t *chd_phf = NULL;
	chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
	
	register double load_factor = c;
	register cmph_uint8 searching_success = 0;
	register cmph_uint32 max_probes = 1 << 20; // default value for max_probes
	register cmph_uint32 iterations = 100;
	chd_ph_bucket_t * buckets = NULL;
	chd_ph_item_t * items = NULL;
	register cmph_uint8 failure = 0;
	cmph_uint32 max_bucket_size = 0;
	chd_ph_sorted_list_t * sorted_lists = NULL;
	cmph_uint32 * disp_table = NULL;
	register double space_lower_bound = 0;
	#ifdef CMPH_TIMING
	double construction_time_begin = 0.0;
	double construction_time = 0.0;
	ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
	#endif


	chd_ph->m = mph->key_source->nkeys;
	DEBUGP("m = %u\n", chd_ph->m);
	
	chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1;
	DEBUGP("nbuckets = %u\n", chd_ph->nbuckets);
	
	if(load_factor < 0.5 )
	{
		load_factor = 0.5;
	}
	
	if(load_factor >= 0.99)
	{
		load_factor = 0.99;
	}
	
	DEBUGP("load_factor = %.3f\n", load_factor);
	
	chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1;
	
	//Round the number of bins to the prime immediately above
	if(chd_ph->n % 2 == 0) chd_ph->n++;
	for(;;)
	{
		if(check_primality(chd_ph->n) == 1)
			break;
		chd_ph->n += 2; // just odd numbers can be primes for n > 2
		
	};
	
	DEBUGP("n = %u \n", chd_ph->n);
	if(chd_ph->keys_per_bin == 1)
	{
		space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n);
	}
	
	if(mph->verbosity)
	{
		fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound);
	}

       	// We allocate the working tables
	buckets = chd_ph_bucket_new(chd_ph->nbuckets); 
	items   = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t));

	max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes);
	
	if(chd_ph->keys_per_bin == 1)
		chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32));
	else
		chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8));
		
	disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32));
// 	
// 	init_genrand(time(0));
	
	while(1)
	{
		iterations --;
		if (mph->verbosity)
		{
			fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n);
		}
		
		if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size))
		{
			if (mph->verbosity)
			{
				fprintf(stderr, "Failure in mapping step\n");		
			}
			failure = 1;
			goto cleanup;
		}

		if (mph->verbosity)
		{
			fprintf(stderr, "Starting ordering step\n");
		}
		if(sorted_lists)
		{
			free(sorted_lists);
		}

        	sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size);
		
		if (mph->verbosity)
		{
			fprintf(stderr, "Starting searching step\n");
		}
		
		searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
		if(searching_success) break;
		
		// reset occup_table
		if(chd_ph->keys_per_bin > 1)
			memset(chd_ph->occup_table, 0, chd_ph->n);
		else
			memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32));
		if(iterations == 0)
		{
			// Cleanup memory
			if (mph->verbosity)
			{
				fprintf(stderr, "Failure because the max trials was exceeded\n");
			}
			failure = 1;
			goto cleanup;
		};
	}

	#ifdef DEBUG
	{
		if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size))
		{
		
			DEBUGP("Error for bin packing generation");
			failure = 1;
			goto cleanup;
		}
	}
	#endif
	
	if (mph->verbosity)
	{
		fprintf(stderr, "Starting compressing step\n");
	}
	
	if(chd_ph->cs)
	{
		free(chd_ph->cs);
	}
	chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
	compressed_seq_init(chd_ph->cs);
	compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets);
	
	#ifdef CMPH_TIMING
	ELAPSED_TIME_IN_SECONDS(&construction_time);
	register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes);
	DEBUGP("Entropy = %.4f\n", entropy/chd_ph->m);
	#endif

cleanup:
	chd_ph_bucket_destroy(buckets); 
	free(items);
	free(sorted_lists);
	free(disp_table);
	if(failure) 
	{
		if(chd_ph->hl)
		{
			hash_state_destroy(chd_ph->hl);
		}
		chd_ph->hl = NULL;
		return NULL;
	}

	mphf = (cmph_t *)malloc(sizeof(cmph_t));
	mphf->algo = mph->algo;
	chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t));
	
	chd_phf->cs = chd_ph->cs;
	chd_ph->cs = NULL; //transfer memory ownership
	chd_phf->hl = chd_ph->hl;
	chd_ph->hl = NULL; //transfer memory ownership
	chd_phf->n = chd_ph->n;
	chd_phf->nbuckets = chd_ph->nbuckets;
	
	mphf->data = chd_phf;
	mphf->size = chd_ph->n;

	DEBUGP("Successfully generated minimal perfect hash\n");
	if (mph->verbosity)
	{
		fprintf(stderr, "Successfully generated minimal perfect hash function\n");
	}
	
	#ifdef CMPH_TIMING	
	register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8;
	construction_time = construction_time - construction_time_begin;
	fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m);
	#endif	

	return mphf;
}
Beispiel #9
0
cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items, cmph_uint32 *max_bucket_size)
{
	register cmph_uint32 i = 0, g = 0;
	cmph_uint32 hl[3];
	chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
	char * key = NULL;
	cmph_uint32 keylen = 0;
	chd_ph_map_item_t * map_item;
	chd_ph_map_item_t * map_items = malloc(chd_ph->m*sizeof(chd_ph_map_item_t));
	register cmph_uint32 mapping_iterations = 1000;
	*max_bucket_size = 0;
	while(1)
	{
		mapping_iterations--;
		if (chd_ph->hl) hash_state_destroy(chd_ph->hl);
		chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m); 

		chd_ph_bucket_clean(buckets, chd_ph->nbuckets);

		mph->key_source->rewind(mph->key_source->data);  

		for(i = 0; i < chd_ph->m; i++)
		{
			mph->key_source->read(mph->key_source->data, &key, &keylen);		
			hash_vector(chd_ph->hl, key, keylen, hl);
			
			map_item = (map_items + i);

			g = hl[0] % chd_ph->nbuckets;
			map_item->f = hl[1] % chd_ph->n;
			map_item->h = hl[2] % (chd_ph->n - 1) + 1;
			map_item->bucket_num=g;
			mph->key_source->dispose(mph->key_source->data, key, keylen);		
// 			if(buckets[g].size == (chd_ph->keys_per_bucket << 2))
// 			{
// 				DEBUGP("BUCKET = %u -- SIZE = %u -- MAXIMUM SIZE = %u\n", g, buckets[g].size, (chd_ph->keys_per_bucket << 2));
// 				goto error;
// 			}
			buckets[g].size++;
			if(buckets[g].size > *max_bucket_size)
			{
				  *max_bucket_size = buckets[g].size;
			}
		}
		buckets[0].items_list = 0;
		for(i = 1; i < chd_ph->nbuckets; i++)
		{
			buckets[i].items_list = buckets[i-1].items_list + buckets[i - 1].size;
			buckets[i - 1].size = 0;
		};
		buckets[i - 1].size = 0;
		for(i = 0; i < chd_ph->m; i++)
		{
			map_item = (map_items + i);
			if(!chd_ph_bucket_insert(buckets, map_items, items, chd_ph->nbuckets, i))
				break;
		}
		if(i == chd_ph->m)
		{
			free(map_items);
			return 1; // SUCCESS
		}
		
		if(mapping_iterations == 0)
		{
		      goto error;
		}
	}
error:
	free(map_items);
	hash_state_destroy(chd_ph->hl);
	chd_ph->hl = NULL;
	return 0; // FAILURE
}
Beispiel #10
0
cmph_t *chm_new(cmph_config_t *mph, double c)
{
    cmph_t *mphf = NULL;
    chm_data_t *chmf = NULL;

    cmph_uint32 i;
    cmph_uint32 iterations = 20;
    cmph_uint8 *visited = NULL;
    chm_config_data_t *chm = (chm_config_data_t *)mph->data;
    chm->m = mph->key_source->nkeys;
    if (c == 0) c = 2.09;
    chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys);
    DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c);
    chm->graph = graph_new(chm->n, chm->m);
    DEBUGP("Created graph\n");

    chm->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3);
    for(i = 0; i < 3; ++i) chm->hashes[i] = NULL;
    //Mapping step
    if (mph->verbosity)
    {
        fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", chm->m, chm->n);
    }
    while(1)
    {
        int ok;
        chm->hashes[0] = hash_state_new(chm->hashfuncs[0], chm->n);
        chm->hashes[1] = hash_state_new(chm->hashfuncs[1], chm->n);
        ok = chm_gen_edges(mph);
        if (!ok)
        {
            --iterations;
            hash_state_destroy(chm->hashes[0]);
            chm->hashes[0] = NULL;
            hash_state_destroy(chm->hashes[1]);
            chm->hashes[1] = NULL;
            DEBUGP("%u iterations remaining\n", iterations);
            if (mph->verbosity)
            {
                fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations);
            }
            if (iterations == 0) break;
        }
        else break;
    }
    if (iterations == 0)
    {
        graph_destroy(chm->graph);
        return NULL;
    }

    //Assignment step
    if (mph->verbosity)
    {
        fprintf(stderr, "Starting assignment step\n");
    }
    DEBUGP("Assignment step\n");
    visited = (cmph_uint8 *)malloc((size_t)(chm->n/8 + 1));
    memset(visited, 0, (size_t)(chm->n/8 + 1));
    free(chm->g);
    chm->g = (cmph_uint32 *)malloc(chm->n * sizeof(cmph_uint32));
    assert(chm->g);
    for (i = 0; i < chm->n; ++i)
    {
        if (!GETBIT(visited,i))
        {
            chm->g[i] = 0;
            chm_traverse(chm, visited, i);
        }
    }
    graph_destroy(chm->graph);
    free(visited);
    chm->graph = NULL;

    mphf = (cmph_t *)malloc(sizeof(cmph_t));
    mphf->algo = mph->algo;
    chmf = (chm_data_t *)malloc(sizeof(chm_data_t));
    chmf->g = chm->g;
    chm->g = NULL; //transfer memory ownership
    chmf->hashes = chm->hashes;
    chm->hashes = NULL; //transfer memory ownership
    chmf->n = chm->n;
    chmf->m = chm->m;
    mphf->data = chmf;
    mphf->size = chm->m;
    DEBUGP("Successfully generated minimal perfect hash\n");
    if (mph->verbosity)
    {
        fprintf(stderr, "Successfully generated minimal perfect hash function\n");
    }
    return mphf;
}
Beispiel #11
0
cmph_t *bdz_new(cmph_config_t *mph, double c)
{
	cmph_t *mphf = NULL;
	bdz_data_t *bdzf = NULL;
	cmph_uint32 iterations;
	bdz_queue_t edges;
	bdz_graph3_t graph3;
	bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
	#ifdef CMPH_TIMING
	double construction_time_begin = 0.0;
	double construction_time = 0.0;
	ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
	#endif


	if (c == 0) c = 1.23; // validating restrictions over parameter c.
	DEBUGP("c: %f\n", c);
	bdz->m = mph->key_source->nkeys;	
	bdz->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
	if ((bdz->r % 2) == 0) bdz->r+=1;
	bdz->n = 3*bdz->r;

	bdz->k = (1U << bdz->b);
	DEBUGP("b: %u -- k: %u\n", bdz->b, bdz->k);
	
	bdz->ranktablesize = (cmph_uint32)ceil(bdz->n/(double)bdz->k);
	DEBUGP("ranktablesize: %u\n", bdz->ranktablesize);

	
	bdz_alloc_graph3(&graph3, bdz->m, bdz->n);
	bdz_alloc_queue(&edges,bdz->m);
	DEBUGP("Created hypergraph\n");
	
	DEBUGP("m (edges): %u n (vertices): %u  r: %u c: %f \n", bdz->m, bdz->n, bdz->r, c);

	// Mapping step
	iterations = 1000;
	if (mph->verbosity)
	{
		fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
	}
	while(1)
	{
		int ok;
		DEBUGP("linear hash function \n");
		bdz->hl = hash_state_new(bdz->hashfunc, 15);

		ok = bdz_mapping(mph, &graph3, edges);
                //ok = 0;
		if (!ok)
		{
			--iterations;
			hash_state_destroy(bdz->hl);
			bdz->hl = NULL;
			DEBUGP("%u iterations remaining\n", iterations);
			if (mph->verbosity)
			{
				fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
			}
			if (iterations == 0) break;
		} 
		else break;
	}
	
	if (iterations == 0)
	{
		bdz_free_queue(&edges);
		bdz_free_graph3(&graph3);
		return NULL;
	}
	bdz_partial_free_graph3(&graph3);
	// Assigning step
	if (mph->verbosity)
	{
		fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
	}
	assigning(bdz, &graph3, edges);

	bdz_free_queue(&edges);
	bdz_free_graph3(&graph3);
	if (mph->verbosity)
	{
		fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
	}
	ranking(bdz);
	#ifdef CMPH_TIMING	
	ELAPSED_TIME_IN_SECONDS(&construction_time);
	#endif
	mphf = (cmph_t *)malloc(sizeof(cmph_t));
	mphf->algo = mph->algo;
	bdzf = (bdz_data_t *)malloc(sizeof(bdz_data_t));
	bdzf->g = bdz->g;
	bdz->g = NULL; //transfer memory ownership
	bdzf->hl = bdz->hl;
	bdz->hl = NULL; //transfer memory ownership
	bdzf->ranktable = bdz->ranktable;
	bdz->ranktable = NULL; //transfer memory ownership
	bdzf->ranktablesize = bdz->ranktablesize;
	bdzf->k = bdz->k;
	bdzf->b = bdz->b;
	bdzf->n = bdz->n;
	bdzf->m = bdz->m;
	bdzf->r = bdz->r;
	mphf->data = bdzf;
	mphf->size = bdz->m;

	DEBUGP("Successfully generated minimal perfect hash\n");
	if (mph->verbosity)
	{
		fprintf(stderr, "Successfully generated minimal perfect hash function\n");
	}


	#ifdef CMPH_TIMING	
	register cmph_uint32 space_usage = bdz_packed_size(mphf)*8;
	register cmph_uint32 keys_per_bucket = 1;
	construction_time = construction_time - construction_time_begin;
	fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m);
	#endif	

	return mphf;
}
Beispiel #12
0
cmph_t *brz_new(cmph_config_t *mph, double c)
{
	cmph_t *mphf = NULL;
	brz_data_t *brzf = NULL;
	cmph_uint32 i;
	cmph_uint32 iterations = 20;

	DEBUGP("c: %f\n", c);
	brz_config_data_t *brz = (brz_config_data_t *)mph->data;
	switch(brz->algo) // validating restrictions over parameter c.
	{
		case CMPH_BMZ8:
			if (c == 0 || c >= 2.0) c = 1;
			break;
		case CMPH_FCH:
			if (c <= 2.0) c = 2.6;
			break;
		default:
			assert(0);
	}
	brz->c = c;
	brz->m = mph->key_source->nkeys;
	DEBUGP("m: %u\n", brz->m);
        brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b));
	DEBUGP("k: %u\n", brz->k);
	brz->size   = (cmph_uint8 *) calloc((size_t)brz->k, sizeof(cmph_uint8));
	
	// Clustering the keys by graph id.
	if (mph->verbosity)
	{
		fprintf(stderr, "Partioning the set of keys.\n");	
	}
		
	while(1)
	{
		int ok;
		DEBUGP("hash function 3\n");
		brz->h0 = hash_state_new(brz->hashfuncs[2], brz->k);
		DEBUGP("Generating graphs\n");
		ok = brz_gen_mphf(mph);
		if (!ok)
		{
			--iterations;
			hash_state_destroy(brz->h0);
			brz->h0 = NULL;
			DEBUGP("%u iterations remaining to create the graphs in a external file\n", iterations);
			if (mph->verbosity)
			{
				fprintf(stderr, "Failure: A graph with more than 255 keys was created - %u iterations remaining\n", iterations);
			}
			if (iterations == 0) break;
		} 
		else break;	
	}
	if (iterations == 0) 
	{
		DEBUGP("Graphs with more than 255 keys were created in all 20 iterations\n");
		free(brz->size);
		return NULL;
	}
	DEBUGP("Graphs generated\n");
	
	brz->offset = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32));
	for (i = 1; i < brz->k; ++i)
	{
		brz->offset[i] = brz->size[i-1] + brz->offset[i-1];
	}
	// Generating a mphf
	mphf = (cmph_t *)malloc(sizeof(cmph_t));
	mphf->algo = mph->algo;
	brzf = (brz_data_t *)malloc(sizeof(brz_data_t));
	brzf->g = brz->g;
	brz->g = NULL; //transfer memory ownership
	brzf->h1 = brz->h1;
	brz->h1 = NULL; //transfer memory ownership
	brzf->h2 = brz->h2;
	brz->h2 = NULL; //transfer memory ownership
	brzf->h0 = brz->h0;
	brz->h0 = NULL; //transfer memory ownership
	brzf->size = brz->size;
	brz->size = NULL; //transfer memory ownership
	brzf->offset = brz->offset;
	brz->offset = NULL; //transfer memory ownership
	brzf->k = brz->k;
	brzf->c = brz->c;
	brzf->m = brz->m;
	brzf->algo = brz->algo;
	mphf->data = brzf;
	mphf->size = brz->m;	
	DEBUGP("Successfully generated minimal perfect hash\n");
	if (mph->verbosity)
	{
		fprintf(stderr, "Successfully generated minimal perfect hash function\n");
	}
	return mphf;
}