void fch_destroy(cmph_t *mphf) { fch_data_t *data = (fch_data_t *)mphf->data; free(data->g); hash_state_destroy(data->h1); hash_state_destroy(data->h2); free(data); free(mphf); }
void bmz8_destroy(cmph_t *mphf) { bmz8_data_t *data = (bmz8_data_t *)mphf->data; free(data->g); hash_state_destroy(data->hashes[0]); hash_state_destroy(data->hashes[1]); free(data->hashes); free(data); free(mphf); }
static fch_buckets_t * mapping(cmph_config_t *mph) { cmph_uint32 i = 0; fch_buckets_t *buckets = NULL; fch_config_data_t *fch = (fch_config_data_t *)mph->data; if (fch->h1) hash_state_destroy(fch->h1); fch->h1 = hash_state_new(fch->hashfuncs[0], fch->m); fch->b = fch_calc_b(fch->c, fch->m); fch->p1 = fch_calc_p1(fch->m); fch->p2 = fch_calc_p2(fch->b); //DEBUGP("b:%u p1:%f p2:%f\n", fch->b, fch->p1, fch->p2); buckets = fch_buckets_new(fch->b); mph->key_source->rewind(mph->key_source->data); for(i = 0; i < fch->m; i++) { cmph_uint32 h1, keylen; char *key = NULL; mph->key_source->read(mph->key_source->data, &key, &keylen); h1 = hash(fch->h1, key, keylen) % fch->m; h1 = mixh10h11h12 (fch->b, fch->p1, fch->p2, h1); fch_buckets_insert(buckets, h1, key, keylen); key = NULL; // transger memory ownership } return buckets; }
void chd_ph_destroy(cmph_t *mphf) { chd_ph_data_t *data = (chd_ph_data_t *)mphf->data; compressed_seq_destroy(data->cs); free(data->cs); hash_state_destroy(data->hl); free(data); free(mphf); }
void brz_destroy(cmph_t *mphf) { cmph_uint32 i; brz_data_t *data = (brz_data_t *)mphf->data; if(data->g) { for(i = 0; i < data->k; i++) { free(data->g[i]); hash_state_destroy(data->h1[i]); hash_state_destroy(data->h2[i]); } free(data->g); free(data->h1); free(data->h2); } hash_state_destroy(data->h0); free(data->size); free(data->offset); free(data); free(mphf); }
cmph_t *bmz8_new(cmph_config_t *mph, double c) { cmph_t *mphf = NULL; bmz8_data_t *bmz8f = NULL; cmph_uint8 i; cmph_uint8 iterations; cmph_uint8 iterations_map = 20; cmph_uint8 *used_edges = NULL; cmph_uint8 restart_mapping = 0; cmph_uint8 * visited = NULL; bmz8_config_data_t *bmz8 = (bmz8_config_data_t *)mph->data; if (mph->key_source->nkeys >= 256) { if (mph->verbosity) fprintf(stderr, "The number of keys in BMZ8 must be lower than 256.\n"); return NULL; } if (c == 0) c = 1.15; // validating restrictions over parameter c. DEBUGP("c: %f\n", c); bmz8->m = (cmph_uint8) mph->key_source->nkeys; bmz8->n = (cmph_uint8) ceil(c * mph->key_source->nkeys); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", bmz8->m, bmz8->n, c); bmz8->graph = graph_new(bmz8->n, bmz8->m); DEBUGP("Created graph\n"); bmz8->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3); for(i = 0; i < 3; ++i) bmz8->hashes[i] = NULL; do { // Mapping step cmph_uint8 biggest_g_value = 0; cmph_uint8 biggest_edge_value = 1; iterations = 100; if (mph->verbosity) { fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bmz8->m, bmz8->n); } while(1) { int ok; DEBUGP("hash function 1\n"); bmz8->hashes[0] = hash_state_new(bmz8->hashfuncs[0], bmz8->n); DEBUGP("hash function 2\n"); bmz8->hashes[1] = hash_state_new(bmz8->hashfuncs[1], bmz8->n); DEBUGP("Generating edges\n"); ok = bmz8_gen_edges(mph); if (!ok) { --iterations; hash_state_destroy(bmz8->hashes[0]); bmz8->hashes[0] = NULL; hash_state_destroy(bmz8->hashes[1]); bmz8->hashes[1] = NULL; DEBUGP("%u iterations remaining\n", iterations); if (mph->verbosity) { fprintf(stderr, "simple graph creation failure - %u iterations remaining\n", iterations); } if (iterations == 0) break; } else break; } if (iterations == 0) { graph_destroy(bmz8->graph); return NULL; } // Ordering step if (mph->verbosity) { fprintf(stderr, "Starting ordering step\n"); } graph_obtain_critical_nodes(bmz8->graph); // Searching step if (mph->verbosity) { fprintf(stderr, "Starting Searching step.\n"); fprintf(stderr, "\tTraversing critical vertices.\n"); } DEBUGP("Searching step\n"); visited = (cmph_uint8 *)malloc((size_t)bmz8->n/8 + 1); memset(visited, 0, (size_t)bmz8->n/8 + 1); used_edges = (cmph_uint8 *)malloc((size_t)bmz8->m/8 + 1); memset(used_edges, 0, (size_t)bmz8->m/8 + 1); free(bmz8->g); bmz8->g = (cmph_uint8 *)calloc((size_t)bmz8->n, sizeof(cmph_uint8)); assert(bmz8->g); for (i = 0; i < bmz8->n; ++i) // critical nodes { if (graph_node_is_critical(bmz8->graph, i) && (!GETBIT(visited,i))) { if(c > 1.14) restart_mapping = bmz8_traverse_critical_nodes(bmz8, i, &biggest_g_value, &biggest_edge_value, used_edges, visited); else restart_mapping = bmz8_traverse_critical_nodes_heuristic(bmz8, i, &biggest_g_value, &biggest_edge_value, used_edges, visited); if(restart_mapping) break; } } if(!restart_mapping) { if (mph->verbosity) { fprintf(stderr, "\tTraversing non critical vertices.\n"); } bmz8_traverse_non_critical_nodes(bmz8, used_edges, visited); // non_critical_nodes } else { iterations_map--; if (mph->verbosity) fprintf(stderr, "Restarting mapping step. %u iterations remaining.\n", iterations_map); } free(used_edges); free(visited); }while(restart_mapping && iterations_map > 0); graph_destroy(bmz8->graph); bmz8->graph = NULL; if (iterations_map == 0) { return NULL; } mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; bmz8f = (bmz8_data_t *)malloc(sizeof(bmz8_data_t)); bmz8f->g = bmz8->g; bmz8->g = NULL; //transfer memory ownership bmz8f->hashes = bmz8->hashes; bmz8->hashes = NULL; //transfer memory ownership bmz8f->n = bmz8->n; bmz8f->m = bmz8->m; mphf->data = bmz8f; mphf->size = bmz8->m; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } return mphf; }
static cmph_uint8 searching(fch_config_data_t *fch, fch_buckets_t *buckets, cmph_uint32 *sorted_indexes) { cmph_uint32 * random_table = (cmph_uint32 *) calloc((size_t)fch->m, sizeof(cmph_uint32)); cmph_uint32 * map_table = (cmph_uint32 *) calloc((size_t)fch->m, sizeof(cmph_uint32)); cmph_uint32 iteration_to_generate_h2 = 0; cmph_uint32 searching_iterations = 0; cmph_uint8 restart = 0; cmph_uint32 nbuckets = fch_buckets_get_nbuckets(buckets); cmph_uint32 i, j, z, counter = 0, filled_count = 0; if (fch->g) free (fch->g); fch->g = (cmph_uint32 *) calloc((size_t)fch->b, sizeof(cmph_uint32)); //DEBUGP("max bucket size: %u\n", fch_buckets_get_max_size(buckets)); for(i = 0; i < fch->m; i++) { random_table[i] = i; } permut(random_table, fch->m); for(i = 0; i < fch->m; i++) { map_table[random_table[i]] = i; } do { if (fch->h2) hash_state_destroy(fch->h2); fch->h2 = hash_state_new(fch->hashfuncs[1], fch->m); restart = check_for_collisions_h2(fch, buckets, sorted_indexes); filled_count = 0; if (!restart) { searching_iterations++; iteration_to_generate_h2 = 0; //DEBUGP("searching_iterations: %u\n", searching_iterations); } else { iteration_to_generate_h2++; //DEBUGP("iteration_to_generate_h2: %u\n", iteration_to_generate_h2); } for(i = 0; (i < nbuckets) && !restart; i++) { cmph_uint32 bucketsize = fch_buckets_get_size(buckets, sorted_indexes[i]); if (bucketsize == 0) { restart = 0; // false break; } else restart = 1; // true for(z = 0; (z < (fch->m - filled_count)) && restart; z++) { char * key = fch_buckets_get_key(buckets, sorted_indexes[i], INDEX); cmph_uint32 keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], INDEX); cmph_uint32 h2 = hash(fch->h2, key, keylen) % fch->m; counter = 0; restart = 0; // false fch->g[sorted_indexes[i]] = (fch->m + random_table[filled_count + z] - h2) % fch->m; //DEBUGP("g[%u]: %u\n", sorted_indexes[i], fch->g[sorted_indexes[i]]); j = INDEX; do { cmph_uint32 index = 0; key = fch_buckets_get_key(buckets, sorted_indexes[i], j); keylen = fch_buckets_get_keylength(buckets, sorted_indexes[i], j); h2 = hash(fch->h2, key, keylen) % fch->m; index = (h2 + fch->g[sorted_indexes[i]]) % fch->m; //DEBUGP("key:%s keylen:%u index: %u h2:%u bucketsize:%u\n", key, keylen, index, h2, bucketsize); if (map_table[index] >= filled_count) { cmph_uint32 y = map_table[index]; cmph_uint32 ry = random_table[y]; random_table[y] = random_table[filled_count]; random_table[filled_count] = ry; map_table[random_table[y]] = y; map_table[random_table[filled_count]] = filled_count; filled_count++; counter ++; } else { restart = 1; // true filled_count = filled_count - counter; counter = 0; break; } j = (j + 1) % bucketsize; } while(j % bucketsize != INDEX); } //getchar(); } //} while(restart && (searching_iterations < 10)); } while(restart && (searching_iterations < 10) && (iteration_to_generate_h2 < 1000)); // DC free(map_table); free(random_table); return restart; }
cmph_t *chd_ph_new(cmph_config_t *mph, double c) { cmph_t *mphf = NULL; chd_ph_data_t *chd_phf = NULL; chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data; register double load_factor = c; register cmph_uint8 searching_success = 0; register cmph_uint32 max_probes = 1 << 20; // default value for max_probes register cmph_uint32 iterations = 100; chd_ph_bucket_t * buckets = NULL; chd_ph_item_t * items = NULL; register cmph_uint8 failure = 0; cmph_uint32 max_bucket_size = 0; chd_ph_sorted_list_t * sorted_lists = NULL; cmph_uint32 * disp_table = NULL; register double space_lower_bound = 0; #ifdef CMPH_TIMING double construction_time_begin = 0.0; double construction_time = 0.0; ELAPSED_TIME_IN_SECONDS(&construction_time_begin); #endif chd_ph->m = mph->key_source->nkeys; DEBUGP("m = %u\n", chd_ph->m); chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1; DEBUGP("nbuckets = %u\n", chd_ph->nbuckets); if(load_factor < 0.5 ) { load_factor = 0.5; } if(load_factor >= 0.99) { load_factor = 0.99; } DEBUGP("load_factor = %.3f\n", load_factor); chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1; //Round the number of bins to the prime immediately above if(chd_ph->n % 2 == 0) chd_ph->n++; for(;;) { if(check_primality(chd_ph->n) == 1) break; chd_ph->n += 2; // just odd numbers can be primes for n > 2 }; DEBUGP("n = %u \n", chd_ph->n); if(chd_ph->keys_per_bin == 1) { space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n); } if(mph->verbosity) { fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound); } // We allocate the working tables buckets = chd_ph_bucket_new(chd_ph->nbuckets); items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t)); max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes); if(chd_ph->keys_per_bin == 1) chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32)); else chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8)); disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32)); // // init_genrand(time(0)); while(1) { iterations --; if (mph->verbosity) { fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n); } if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size)) { if (mph->verbosity) { fprintf(stderr, "Failure in mapping step\n"); } failure = 1; goto cleanup; } if (mph->verbosity) { fprintf(stderr, "Starting ordering step\n"); } if(sorted_lists) { free(sorted_lists); } sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size); if (mph->verbosity) { fprintf(stderr, "Starting searching step\n"); } searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table); if(searching_success) break; // reset occup_table if(chd_ph->keys_per_bin > 1) memset(chd_ph->occup_table, 0, chd_ph->n); else memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32)); if(iterations == 0) { // Cleanup memory if (mph->verbosity) { fprintf(stderr, "Failure because the max trials was exceeded\n"); } failure = 1; goto cleanup; }; } #ifdef DEBUG { if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size)) { DEBUGP("Error for bin packing generation"); failure = 1; goto cleanup; } } #endif if (mph->verbosity) { fprintf(stderr, "Starting compressing step\n"); } if(chd_ph->cs) { free(chd_ph->cs); } chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t)); compressed_seq_init(chd_ph->cs); compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets); #ifdef CMPH_TIMING ELAPSED_TIME_IN_SECONDS(&construction_time); register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes); DEBUGP("Entropy = %.4f\n", entropy/chd_ph->m); #endif cleanup: chd_ph_bucket_destroy(buckets); free(items); free(sorted_lists); free(disp_table); if(failure) { if(chd_ph->hl) { hash_state_destroy(chd_ph->hl); } chd_ph->hl = NULL; return NULL; } mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t)); chd_phf->cs = chd_ph->cs; chd_ph->cs = NULL; //transfer memory ownership chd_phf->hl = chd_ph->hl; chd_ph->hl = NULL; //transfer memory ownership chd_phf->n = chd_ph->n; chd_phf->nbuckets = chd_ph->nbuckets; mphf->data = chd_phf; mphf->size = chd_ph->n; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } #ifdef CMPH_TIMING register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m); #endif return mphf; }
cmph_uint8 chd_ph_mapping(cmph_config_t *mph, chd_ph_bucket_t * buckets, chd_ph_item_t * items, cmph_uint32 *max_bucket_size) { register cmph_uint32 i = 0, g = 0; cmph_uint32 hl[3]; chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data; char * key = NULL; cmph_uint32 keylen = 0; chd_ph_map_item_t * map_item; chd_ph_map_item_t * map_items = malloc(chd_ph->m*sizeof(chd_ph_map_item_t)); register cmph_uint32 mapping_iterations = 1000; *max_bucket_size = 0; while(1) { mapping_iterations--; if (chd_ph->hl) hash_state_destroy(chd_ph->hl); chd_ph->hl = hash_state_new(chd_ph->hashfunc, chd_ph->m); chd_ph_bucket_clean(buckets, chd_ph->nbuckets); mph->key_source->rewind(mph->key_source->data); for(i = 0; i < chd_ph->m; i++) { mph->key_source->read(mph->key_source->data, &key, &keylen); hash_vector(chd_ph->hl, key, keylen, hl); map_item = (map_items + i); g = hl[0] % chd_ph->nbuckets; map_item->f = hl[1] % chd_ph->n; map_item->h = hl[2] % (chd_ph->n - 1) + 1; map_item->bucket_num=g; mph->key_source->dispose(mph->key_source->data, key, keylen); // if(buckets[g].size == (chd_ph->keys_per_bucket << 2)) // { // DEBUGP("BUCKET = %u -- SIZE = %u -- MAXIMUM SIZE = %u\n", g, buckets[g].size, (chd_ph->keys_per_bucket << 2)); // goto error; // } buckets[g].size++; if(buckets[g].size > *max_bucket_size) { *max_bucket_size = buckets[g].size; } } buckets[0].items_list = 0; for(i = 1; i < chd_ph->nbuckets; i++) { buckets[i].items_list = buckets[i-1].items_list + buckets[i - 1].size; buckets[i - 1].size = 0; }; buckets[i - 1].size = 0; for(i = 0; i < chd_ph->m; i++) { map_item = (map_items + i); if(!chd_ph_bucket_insert(buckets, map_items, items, chd_ph->nbuckets, i)) break; } if(i == chd_ph->m) { free(map_items); return 1; // SUCCESS } if(mapping_iterations == 0) { goto error; } } error: free(map_items); hash_state_destroy(chd_ph->hl); chd_ph->hl = NULL; return 0; // FAILURE }
cmph_t *chm_new(cmph_config_t *mph, double c) { cmph_t *mphf = NULL; chm_data_t *chmf = NULL; cmph_uint32 i; cmph_uint32 iterations = 20; cmph_uint8 *visited = NULL; chm_config_data_t *chm = (chm_config_data_t *)mph->data; chm->m = mph->key_source->nkeys; if (c == 0) c = 2.09; chm->n = (cmph_uint32)ceil(c * mph->key_source->nkeys); DEBUGP("m (edges): %u n (vertices): %u c: %f\n", chm->m, chm->n, c); chm->graph = graph_new(chm->n, chm->m); DEBUGP("Created graph\n"); chm->hashes = (hash_state_t **)malloc(sizeof(hash_state_t *)*3); for(i = 0; i < 3; ++i) chm->hashes[i] = NULL; //Mapping step if (mph->verbosity) { fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", chm->m, chm->n); } while(1) { int ok; chm->hashes[0] = hash_state_new(chm->hashfuncs[0], chm->n); chm->hashes[1] = hash_state_new(chm->hashfuncs[1], chm->n); ok = chm_gen_edges(mph); if (!ok) { --iterations; hash_state_destroy(chm->hashes[0]); chm->hashes[0] = NULL; hash_state_destroy(chm->hashes[1]); chm->hashes[1] = NULL; DEBUGP("%u iterations remaining\n", iterations); if (mph->verbosity) { fprintf(stderr, "Acyclic graph creation failure - %u iterations remaining\n", iterations); } if (iterations == 0) break; } else break; } if (iterations == 0) { graph_destroy(chm->graph); return NULL; } //Assignment step if (mph->verbosity) { fprintf(stderr, "Starting assignment step\n"); } DEBUGP("Assignment step\n"); visited = (cmph_uint8 *)malloc((size_t)(chm->n/8 + 1)); memset(visited, 0, (size_t)(chm->n/8 + 1)); free(chm->g); chm->g = (cmph_uint32 *)malloc(chm->n * sizeof(cmph_uint32)); assert(chm->g); for (i = 0; i < chm->n; ++i) { if (!GETBIT(visited,i)) { chm->g[i] = 0; chm_traverse(chm, visited, i); } } graph_destroy(chm->graph); free(visited); chm->graph = NULL; mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; chmf = (chm_data_t *)malloc(sizeof(chm_data_t)); chmf->g = chm->g; chm->g = NULL; //transfer memory ownership chmf->hashes = chm->hashes; chm->hashes = NULL; //transfer memory ownership chmf->n = chm->n; chmf->m = chm->m; mphf->data = chmf; mphf->size = chm->m; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } return mphf; }
cmph_t *bdz_new(cmph_config_t *mph, double c) { cmph_t *mphf = NULL; bdz_data_t *bdzf = NULL; cmph_uint32 iterations; bdz_queue_t edges; bdz_graph3_t graph3; bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data; #ifdef CMPH_TIMING double construction_time_begin = 0.0; double construction_time = 0.0; ELAPSED_TIME_IN_SECONDS(&construction_time_begin); #endif if (c == 0) c = 1.23; // validating restrictions over parameter c. DEBUGP("c: %f\n", c); bdz->m = mph->key_source->nkeys; bdz->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3); if ((bdz->r % 2) == 0) bdz->r+=1; bdz->n = 3*bdz->r; bdz->k = (1U << bdz->b); DEBUGP("b: %u -- k: %u\n", bdz->b, bdz->k); bdz->ranktablesize = (cmph_uint32)ceil(bdz->n/(double)bdz->k); DEBUGP("ranktablesize: %u\n", bdz->ranktablesize); bdz_alloc_graph3(&graph3, bdz->m, bdz->n); bdz_alloc_queue(&edges,bdz->m); DEBUGP("Created hypergraph\n"); DEBUGP("m (edges): %u n (vertices): %u r: %u c: %f \n", bdz->m, bdz->n, bdz->r, c); // Mapping step iterations = 1000; if (mph->verbosity) { fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); } while(1) { int ok; DEBUGP("linear hash function \n"); bdz->hl = hash_state_new(bdz->hashfunc, 15); ok = bdz_mapping(mph, &graph3, edges); //ok = 0; if (!ok) { --iterations; hash_state_destroy(bdz->hl); bdz->hl = NULL; DEBUGP("%u iterations remaining\n", iterations); if (mph->verbosity) { fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations); } if (iterations == 0) break; } else break; } if (iterations == 0) { bdz_free_queue(&edges); bdz_free_graph3(&graph3); return NULL; } bdz_partial_free_graph3(&graph3); // Assigning step if (mph->verbosity) { fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); } assigning(bdz, &graph3, edges); bdz_free_queue(&edges); bdz_free_graph3(&graph3); if (mph->verbosity) { fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n); } ranking(bdz); #ifdef CMPH_TIMING ELAPSED_TIME_IN_SECONDS(&construction_time); #endif mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; bdzf = (bdz_data_t *)malloc(sizeof(bdz_data_t)); bdzf->g = bdz->g; bdz->g = NULL; //transfer memory ownership bdzf->hl = bdz->hl; bdz->hl = NULL; //transfer memory ownership bdzf->ranktable = bdz->ranktable; bdz->ranktable = NULL; //transfer memory ownership bdzf->ranktablesize = bdz->ranktablesize; bdzf->k = bdz->k; bdzf->b = bdz->b; bdzf->n = bdz->n; bdzf->m = bdz->m; bdzf->r = bdz->r; mphf->data = bdzf; mphf->size = bdz->m; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } #ifdef CMPH_TIMING register cmph_uint32 space_usage = bdz_packed_size(mphf)*8; register cmph_uint32 keys_per_bucket = 1; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m); #endif return mphf; }
cmph_t *brz_new(cmph_config_t *mph, double c) { cmph_t *mphf = NULL; brz_data_t *brzf = NULL; cmph_uint32 i; cmph_uint32 iterations = 20; DEBUGP("c: %f\n", c); brz_config_data_t *brz = (brz_config_data_t *)mph->data; switch(brz->algo) // validating restrictions over parameter c. { case CMPH_BMZ8: if (c == 0 || c >= 2.0) c = 1; break; case CMPH_FCH: if (c <= 2.0) c = 2.6; break; default: assert(0); } brz->c = c; brz->m = mph->key_source->nkeys; DEBUGP("m: %u\n", brz->m); brz->k = (cmph_uint32)ceil(brz->m/((double)brz->b)); DEBUGP("k: %u\n", brz->k); brz->size = (cmph_uint8 *) calloc((size_t)brz->k, sizeof(cmph_uint8)); // Clustering the keys by graph id. if (mph->verbosity) { fprintf(stderr, "Partioning the set of keys.\n"); } while(1) { int ok; DEBUGP("hash function 3\n"); brz->h0 = hash_state_new(brz->hashfuncs[2], brz->k); DEBUGP("Generating graphs\n"); ok = brz_gen_mphf(mph); if (!ok) { --iterations; hash_state_destroy(brz->h0); brz->h0 = NULL; DEBUGP("%u iterations remaining to create the graphs in a external file\n", iterations); if (mph->verbosity) { fprintf(stderr, "Failure: A graph with more than 255 keys was created - %u iterations remaining\n", iterations); } if (iterations == 0) break; } else break; } if (iterations == 0) { DEBUGP("Graphs with more than 255 keys were created in all 20 iterations\n"); free(brz->size); return NULL; } DEBUGP("Graphs generated\n"); brz->offset = (cmph_uint32 *)calloc((size_t)brz->k, sizeof(cmph_uint32)); for (i = 1; i < brz->k; ++i) { brz->offset[i] = brz->size[i-1] + brz->offset[i-1]; } // Generating a mphf mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; brzf = (brz_data_t *)malloc(sizeof(brz_data_t)); brzf->g = brz->g; brz->g = NULL; //transfer memory ownership brzf->h1 = brz->h1; brz->h1 = NULL; //transfer memory ownership brzf->h2 = brz->h2; brz->h2 = NULL; //transfer memory ownership brzf->h0 = brz->h0; brz->h0 = NULL; //transfer memory ownership brzf->size = brz->size; brz->size = NULL; //transfer memory ownership brzf->offset = brz->offset; brz->offset = NULL; //transfer memory ownership brzf->k = brz->k; brzf->c = brz->c; brzf->m = brz->m; brzf->algo = brz->algo; mphf->data = brzf; mphf->size = brz->m; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } return mphf; }