Пример #1
0
cmph_t *chd_ph_new(cmph_config_t *mph, double c)
{
	cmph_t *mphf = NULL;
	chd_ph_data_t *chd_phf = NULL;
	chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data;
	
	register double load_factor = c;
	register cmph_uint8 searching_success = 0;
	register cmph_uint32 max_probes = 1 << 20; // default value for max_probes
	register cmph_uint32 iterations = 100;
	chd_ph_bucket_t * buckets = NULL;
	chd_ph_item_t * items = NULL;
	register cmph_uint8 failure = 0;
	cmph_uint32 max_bucket_size = 0;
	chd_ph_sorted_list_t * sorted_lists = NULL;
	cmph_uint32 * disp_table = NULL;
	register double space_lower_bound = 0;
	#ifdef CMPH_TIMING
	double construction_time_begin = 0.0;
	double construction_time = 0.0;
	ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
	#endif


	chd_ph->m = mph->key_source->nkeys;
	DEBUGP("m = %u\n", chd_ph->m);
	
	chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1;
	DEBUGP("nbuckets = %u\n", chd_ph->nbuckets);
	
	if(load_factor < 0.5 )
	{
		load_factor = 0.5;
	}
	
	if(load_factor >= 0.99)
	{
		load_factor = 0.99;
	}
	
	DEBUGP("load_factor = %.3f\n", load_factor);
	
	chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1;
	
	//Round the number of bins to the prime immediately above
	if(chd_ph->n % 2 == 0) chd_ph->n++;
	for(;;)
	{
		if(check_primality(chd_ph->n) == 1)
			break;
		chd_ph->n += 2; // just odd numbers can be primes for n > 2
		
	};
	
	DEBUGP("n = %u \n", chd_ph->n);
	if(chd_ph->keys_per_bin == 1)
	{
		space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n);
	}
	
	if(mph->verbosity)
	{
		fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound);
	}

       	// We allocate the working tables
	buckets = chd_ph_bucket_new(chd_ph->nbuckets); 
	items   = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t));

	max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes);
	
	if(chd_ph->keys_per_bin == 1)
		chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32));
	else
		chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8));
		
	disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32));
// 	
// 	init_genrand(time(0));
	
	while(1)
	{
		iterations --;
		if (mph->verbosity)
		{
			fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n);
		}
		
		if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size))
		{
			if (mph->verbosity)
			{
				fprintf(stderr, "Failure in mapping step\n");		
			}
			failure = 1;
			goto cleanup;
		}

		if (mph->verbosity)
		{
			fprintf(stderr, "Starting ordering step\n");
		}
		if(sorted_lists)
		{
			free(sorted_lists);
		}

        	sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size);
		
		if (mph->verbosity)
		{
			fprintf(stderr, "Starting searching step\n");
		}
		
		searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table);
		if(searching_success) break;
		
		// reset occup_table
		if(chd_ph->keys_per_bin > 1)
			memset(chd_ph->occup_table, 0, chd_ph->n);
		else
			memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32));
		if(iterations == 0)
		{
			// Cleanup memory
			if (mph->verbosity)
			{
				fprintf(stderr, "Failure because the max trials was exceeded\n");
			}
			failure = 1;
			goto cleanup;
		};
	}

	#ifdef DEBUG
	{
		if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size))
		{
		
			DEBUGP("Error for bin packing generation");
			failure = 1;
			goto cleanup;
		}
	}
	#endif
	
	if (mph->verbosity)
	{
		fprintf(stderr, "Starting compressing step\n");
	}
	
	if(chd_ph->cs)
	{
		free(chd_ph->cs);
	}
	chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t));
	compressed_seq_init(chd_ph->cs);
	compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets);
	
	#ifdef CMPH_TIMING
	ELAPSED_TIME_IN_SECONDS(&construction_time);
	register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes);
	DEBUGP("Entropy = %.4f\n", entropy/chd_ph->m);
	#endif

cleanup:
	chd_ph_bucket_destroy(buckets); 
	free(items);
	free(sorted_lists);
	free(disp_table);
	if(failure) 
	{
		if(chd_ph->hl)
		{
			hash_state_destroy(chd_ph->hl);
		}
		chd_ph->hl = NULL;
		return NULL;
	}

	mphf = (cmph_t *)malloc(sizeof(cmph_t));
	mphf->algo = mph->algo;
	chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t));
	
	chd_phf->cs = chd_ph->cs;
	chd_ph->cs = NULL; //transfer memory ownership
	chd_phf->hl = chd_ph->hl;
	chd_ph->hl = NULL; //transfer memory ownership
	chd_phf->n = chd_ph->n;
	chd_phf->nbuckets = chd_ph->nbuckets;
	
	mphf->data = chd_phf;
	mphf->size = chd_ph->n;

	DEBUGP("Successfully generated minimal perfect hash\n");
	if (mph->verbosity)
	{
		fprintf(stderr, "Successfully generated minimal perfect hash function\n");
	}
	
	#ifdef CMPH_TIMING	
	register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8;
	construction_time = construction_time - construction_time_begin;
	fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m);
	#endif	

	return mphf;
}
Пример #2
0
cmph_t *chd_new(cmph_config_t *mph, double c)
{
	cmph_t *mphf = NULL;
	chd_data_t *chdf = NULL;
	chd_config_data_t *chd = (chd_config_data_t *)mph->data;
	chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data;
	compressed_rank_t cr;
	
	register cmph_t * chd_phf = NULL;
	register cmph_uint32 packed_chd_phf_size = 0; 
	cmph_uint8 * packed_chd_phf = NULL;
	
	register cmph_uint32 packed_cr_size = 0; 
	cmph_uint8 * packed_cr = NULL;

	register cmph_uint32 i, idx, nkeys, nvals, nbins;
	cmph_uint32 * vals_table = NULL;
	register cmph_uint32 * occup_table = NULL;
	#ifdef CMPH_TIMING
	double construction_time_begin = 0.0;
	double construction_time = 0.0;
	ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
	#endif

	cmph_config_set_verbosity(chd->chd_ph, mph->verbosity);	
	cmph_config_set_graphsize(chd->chd_ph, c);
	
	if (mph->verbosity)
	{
		fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c);
	}
	
	chd_phf = cmph_new(chd->chd_ph);
	
	if(chd_phf == NULL) 
	{
		return NULL;
	}
	
	packed_chd_phf_size = cmph_packed_size(chd_phf); 
	DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size);
	
	/* Make sure that we have enough space to pack the mphf. */
	packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1);

	/* Pack the mphf. */
	cmph_pack(chd_phf, packed_chd_phf);

	cmph_destroy(chd_phf);
	
	
	if (mph->verbosity)
	{
		fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n");
	}

	compressed_rank_init(&cr);
	nbins = chd_ph->n;
	nkeys = chd_ph->m;
	nvals =  nbins - nkeys; 
	
	vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32));
	occup_table = (cmph_uint32 *)chd_ph->occup_table;
	
	for(i = 0, idx = 0; i < nbins; i++)
	{
		if(!GETBIT32(occup_table, i))
		{
			vals_table[idx++] = i;
		}
	}
	
	compressed_rank_generate(&cr, vals_table, nvals);
	free(vals_table);
	
	packed_cr_size = compressed_rank_packed_size(&cr);
	packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8));
	compressed_rank_pack(&cr, packed_cr);
	compressed_rank_destroy(&cr);

	mphf = (cmph_t *)malloc(sizeof(cmph_t));
	mphf->algo = mph->algo;
	chdf = (chd_data_t *)malloc(sizeof(chd_data_t));
	
	chdf->packed_cr = packed_cr;
	packed_cr = NULL; //transfer memory ownership

	chdf->packed_chd_phf = packed_chd_phf;
	packed_chd_phf = NULL; //transfer memory ownership
	
	chdf->packed_chd_phf_size = packed_chd_phf_size;
	chdf->packed_cr_size = packed_cr_size;
	
	mphf->data = chdf;
	mphf->size = nkeys;

	DEBUGP("Successfully generated minimal perfect hash\n");
	if (mph->verbosity)
	{
		fprintf(stderr, "Successfully generated minimal perfect hash function\n");
	}
	#ifdef CMPH_TIMING	
	ELAPSED_TIME_IN_SECONDS(&construction_time);
	register cmph_uint32 space_usage =  chd_packed_size(mphf)*8;
	construction_time = construction_time - construction_time_begin;
	fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys);
	#endif	

	return mphf;
}
Пример #3
0
cmph_t *bdz_new(cmph_config_t *mph, double c)
{
	cmph_t *mphf = NULL;
	bdz_data_t *bdzf = NULL;
	cmph_uint32 iterations;
	bdz_queue_t edges;
	bdz_graph3_t graph3;
	bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
	#ifdef CMPH_TIMING
	double construction_time_begin = 0.0;
	double construction_time = 0.0;
	ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
	#endif


	if (c == 0) c = 1.23; // validating restrictions over parameter c.
	DEBUGP("c: %f\n", c);
	bdz->m = mph->key_source->nkeys;	
	bdz->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
	if ((bdz->r % 2) == 0) bdz->r+=1;
	bdz->n = 3*bdz->r;

	bdz->k = (1U << bdz->b);
	DEBUGP("b: %u -- k: %u\n", bdz->b, bdz->k);
	
	bdz->ranktablesize = (cmph_uint32)ceil(bdz->n/(double)bdz->k);
	DEBUGP("ranktablesize: %u\n", bdz->ranktablesize);

	
	bdz_alloc_graph3(&graph3, bdz->m, bdz->n);
	bdz_alloc_queue(&edges,bdz->m);
	DEBUGP("Created hypergraph\n");
	
	DEBUGP("m (edges): %u n (vertices): %u  r: %u c: %f \n", bdz->m, bdz->n, bdz->r, c);

	// Mapping step
	iterations = 1000;
	if (mph->verbosity)
	{
		fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
	}
	while(1)
	{
		int ok;
		DEBUGP("linear hash function \n");
		bdz->hl = hash_state_new(bdz->hashfunc, 15);

		ok = bdz_mapping(mph, &graph3, edges);
                //ok = 0;
		if (!ok)
		{
			--iterations;
			hash_state_destroy(bdz->hl);
			bdz->hl = NULL;
			DEBUGP("%u iterations remaining\n", iterations);
			if (mph->verbosity)
			{
				fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
			}
			if (iterations == 0) break;
		} 
		else break;
	}
	
	if (iterations == 0)
	{
		bdz_free_queue(&edges);
		bdz_free_graph3(&graph3);
		return NULL;
	}
	bdz_partial_free_graph3(&graph3);
	// Assigning step
	if (mph->verbosity)
	{
		fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
	}
	assigning(bdz, &graph3, edges);

	bdz_free_queue(&edges);
	bdz_free_graph3(&graph3);
	if (mph->verbosity)
	{
		fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
	}
	ranking(bdz);
	#ifdef CMPH_TIMING	
	ELAPSED_TIME_IN_SECONDS(&construction_time);
	#endif
	mphf = (cmph_t *)malloc(sizeof(cmph_t));
	mphf->algo = mph->algo;
	bdzf = (bdz_data_t *)malloc(sizeof(bdz_data_t));
	bdzf->g = bdz->g;
	bdz->g = NULL; //transfer memory ownership
	bdzf->hl = bdz->hl;
	bdz->hl = NULL; //transfer memory ownership
	bdzf->ranktable = bdz->ranktable;
	bdz->ranktable = NULL; //transfer memory ownership
	bdzf->ranktablesize = bdz->ranktablesize;
	bdzf->k = bdz->k;
	bdzf->b = bdz->b;
	bdzf->n = bdz->n;
	bdzf->m = bdz->m;
	bdzf->r = bdz->r;
	mphf->data = bdzf;
	mphf->size = bdz->m;

	DEBUGP("Successfully generated minimal perfect hash\n");
	if (mph->verbosity)
	{
		fprintf(stderr, "Successfully generated minimal perfect hash function\n");
	}


	#ifdef CMPH_TIMING	
	register cmph_uint32 space_usage = bdz_packed_size(mphf)*8;
	register cmph_uint32 keys_per_bucket = 1;
	construction_time = construction_time - construction_time_begin;
	fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m);
	#endif	

	return mphf;
}