cmph_t *chd_ph_new(cmph_config_t *mph, double c) { cmph_t *mphf = NULL; chd_ph_data_t *chd_phf = NULL; chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data; register double load_factor = c; register cmph_uint8 searching_success = 0; register cmph_uint32 max_probes = 1 << 20; // default value for max_probes register cmph_uint32 iterations = 100; chd_ph_bucket_t * buckets = NULL; chd_ph_item_t * items = NULL; register cmph_uint8 failure = 0; cmph_uint32 max_bucket_size = 0; chd_ph_sorted_list_t * sorted_lists = NULL; cmph_uint32 * disp_table = NULL; register double space_lower_bound = 0; #ifdef CMPH_TIMING double construction_time_begin = 0.0; double construction_time = 0.0; ELAPSED_TIME_IN_SECONDS(&construction_time_begin); #endif chd_ph->m = mph->key_source->nkeys; DEBUGP("m = %u\n", chd_ph->m); chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1; DEBUGP("nbuckets = %u\n", chd_ph->nbuckets); if(load_factor < 0.5 ) { load_factor = 0.5; } if(load_factor >= 0.99) { load_factor = 0.99; } DEBUGP("load_factor = %.3f\n", load_factor); chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1; //Round the number of bins to the prime immediately above if(chd_ph->n % 2 == 0) chd_ph->n++; for(;;) { if(check_primality(chd_ph->n) == 1) break; chd_ph->n += 2; // just odd numbers can be primes for n > 2 }; DEBUGP("n = %u \n", chd_ph->n); if(chd_ph->keys_per_bin == 1) { space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n); } if(mph->verbosity) { fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound); } // We allocate the working tables buckets = chd_ph_bucket_new(chd_ph->nbuckets); items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t)); max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes); if(chd_ph->keys_per_bin == 1) chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32)); else chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8)); disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32)); // // init_genrand(time(0)); while(1) { iterations --; if (mph->verbosity) { fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n); } if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size)) { if (mph->verbosity) { fprintf(stderr, "Failure in mapping step\n"); } failure = 1; goto cleanup; } if (mph->verbosity) { fprintf(stderr, "Starting ordering step\n"); } if(sorted_lists) { free(sorted_lists); } sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size); if (mph->verbosity) { fprintf(stderr, "Starting searching step\n"); } searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table); if(searching_success) break; // reset occup_table if(chd_ph->keys_per_bin > 1) memset(chd_ph->occup_table, 0, chd_ph->n); else memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32)); if(iterations == 0) { // Cleanup memory if (mph->verbosity) { fprintf(stderr, "Failure because the max trials was exceeded\n"); } failure = 1; goto cleanup; }; } #ifdef DEBUG { if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size)) { DEBUGP("Error for bin packing generation"); failure = 1; goto cleanup; } } #endif if (mph->verbosity) { fprintf(stderr, "Starting compressing step\n"); } if(chd_ph->cs) { free(chd_ph->cs); } chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t)); compressed_seq_init(chd_ph->cs); compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets); #ifdef CMPH_TIMING ELAPSED_TIME_IN_SECONDS(&construction_time); register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes); DEBUGP("Entropy = %.4f\n", entropy/chd_ph->m); #endif cleanup: chd_ph_bucket_destroy(buckets); free(items); free(sorted_lists); free(disp_table); if(failure) { if(chd_ph->hl) { hash_state_destroy(chd_ph->hl); } chd_ph->hl = NULL; return NULL; } mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t)); chd_phf->cs = chd_ph->cs; chd_ph->cs = NULL; //transfer memory ownership chd_phf->hl = chd_ph->hl; chd_ph->hl = NULL; //transfer memory ownership chd_phf->n = chd_ph->n; chd_phf->nbuckets = chd_ph->nbuckets; mphf->data = chd_phf; mphf->size = chd_ph->n; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } #ifdef CMPH_TIMING register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m); #endif return mphf; }
cmph_t *chd_new(cmph_config_t *mph, double c) { cmph_t *mphf = NULL; chd_data_t *chdf = NULL; chd_config_data_t *chd = (chd_config_data_t *)mph->data; chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data; compressed_rank_t cr; register cmph_t * chd_phf = NULL; register cmph_uint32 packed_chd_phf_size = 0; cmph_uint8 * packed_chd_phf = NULL; register cmph_uint32 packed_cr_size = 0; cmph_uint8 * packed_cr = NULL; register cmph_uint32 i, idx, nkeys, nvals, nbins; cmph_uint32 * vals_table = NULL; register cmph_uint32 * occup_table = NULL; #ifdef CMPH_TIMING double construction_time_begin = 0.0; double construction_time = 0.0; ELAPSED_TIME_IN_SECONDS(&construction_time_begin); #endif cmph_config_set_verbosity(chd->chd_ph, mph->verbosity); cmph_config_set_graphsize(chd->chd_ph, c); if (mph->verbosity) { fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c); } chd_phf = cmph_new(chd->chd_ph); if(chd_phf == NULL) { return NULL; } packed_chd_phf_size = cmph_packed_size(chd_phf); DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size); /* Make sure that we have enough space to pack the mphf. */ packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1); /* Pack the mphf. */ cmph_pack(chd_phf, packed_chd_phf); cmph_destroy(chd_phf); if (mph->verbosity) { fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n"); } compressed_rank_init(&cr); nbins = chd_ph->n; nkeys = chd_ph->m; nvals = nbins - nkeys; vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32)); occup_table = (cmph_uint32 *)chd_ph->occup_table; for(i = 0, idx = 0; i < nbins; i++) { if(!GETBIT32(occup_table, i)) { vals_table[idx++] = i; } } compressed_rank_generate(&cr, vals_table, nvals); free(vals_table); packed_cr_size = compressed_rank_packed_size(&cr); packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8)); compressed_rank_pack(&cr, packed_cr); compressed_rank_destroy(&cr); mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; chdf = (chd_data_t *)malloc(sizeof(chd_data_t)); chdf->packed_cr = packed_cr; packed_cr = NULL; //transfer memory ownership chdf->packed_chd_phf = packed_chd_phf; packed_chd_phf = NULL; //transfer memory ownership chdf->packed_chd_phf_size = packed_chd_phf_size; chdf->packed_cr_size = packed_cr_size; mphf->data = chdf; mphf->size = nkeys; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } #ifdef CMPH_TIMING ELAPSED_TIME_IN_SECONDS(&construction_time); register cmph_uint32 space_usage = chd_packed_size(mphf)*8; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys); #endif return mphf; }
cmph_t *bdz_new(cmph_config_t *mph, double c) { cmph_t *mphf = NULL; bdz_data_t *bdzf = NULL; cmph_uint32 iterations; bdz_queue_t edges; bdz_graph3_t graph3; bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data; #ifdef CMPH_TIMING double construction_time_begin = 0.0; double construction_time = 0.0; ELAPSED_TIME_IN_SECONDS(&construction_time_begin); #endif if (c == 0) c = 1.23; // validating restrictions over parameter c. DEBUGP("c: %f\n", c); bdz->m = mph->key_source->nkeys; bdz->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3); if ((bdz->r % 2) == 0) bdz->r+=1; bdz->n = 3*bdz->r; bdz->k = (1U << bdz->b); DEBUGP("b: %u -- k: %u\n", bdz->b, bdz->k); bdz->ranktablesize = (cmph_uint32)ceil(bdz->n/(double)bdz->k); DEBUGP("ranktablesize: %u\n", bdz->ranktablesize); bdz_alloc_graph3(&graph3, bdz->m, bdz->n); bdz_alloc_queue(&edges,bdz->m); DEBUGP("Created hypergraph\n"); DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz->m, bdz->n, bdz->r, c); // Mapping step iterations = 1000; if (mph->verbosity) { fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); } while(1) { int ok; DEBUGP("linear hash function \n"); bdz->hl = hash_state_new(bdz->hashfunc, 15); ok = bdz_mapping(mph, &graph3, edges); //ok = 0; if (!ok) { --iterations; hash_state_destroy(bdz->hl); bdz->hl = NULL; DEBUGP("%u iterations remaining\n", iterations); if (mph->verbosity) { fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations); } if (iterations == 0) break; } else break; } if (iterations == 0) { bdz_free_queue(&edges); bdz_free_graph3(&graph3); return NULL; } bdz_partial_free_graph3(&graph3); // Assigning step if (mph->verbosity) { fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); } assigning(bdz, &graph3, edges); bdz_free_queue(&edges); bdz_free_graph3(&graph3); if (mph->verbosity) { fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); } ranking(bdz); #ifdef CMPH_TIMING ELAPSED_TIME_IN_SECONDS(&construction_time); #endif mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; bdzf = (bdz_data_t *)malloc(sizeof(bdz_data_t)); bdzf->g = bdz->g; bdz->g = NULL; //transfer memory ownership bdzf->hl = bdz->hl; bdz->hl = NULL; //transfer memory ownership bdzf->ranktable = bdz->ranktable; bdz->ranktable = NULL; //transfer memory ownership bdzf->ranktablesize = bdz->ranktablesize; bdzf->k = bdz->k; bdzf->b = bdz->b; bdzf->n = bdz->n; bdzf->m = bdz->m; bdzf->r = bdz->r; mphf->data = bdzf; mphf->size = bdz->m; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } #ifdef CMPH_TIMING register cmph_uint32 space_usage = bdz_packed_size(mphf)*8; register cmph_uint32 keys_per_bucket = 1; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m); #endif return mphf; }