/** \fn cmph_uint32 cmph_packed_size(cmph_t *mphf); * \brief Return the amount of space needed to pack mphf. * \param mphf pointer to a mphf * \return the size of the packed function or zero for failures */ cmph_uint32 cmph_packed_size(cmph_t *mphf) { switch(mphf->algo) { case CMPH_CHM: return chm_packed_size(mphf); case CMPH_BMZ: /* included -- Fabiano */ return bmz_packed_size(mphf); case CMPH_BMZ8: /* included -- Fabiano */ return bmz8_packed_size(mphf); case CMPH_BRZ: /* included -- Fabiano */ return brz_packed_size(mphf); case CMPH_FCH: /* included -- Fabiano */ return fch_packed_size(mphf); case CMPH_BDZ: /* included -- Fabiano */ return bdz_packed_size(mphf); case CMPH_BDZ_PH: /* included -- Fabiano */ return bdz_ph_packed_size(mphf); case CMPH_CHD_PH: /* included -- Fabiano */ return chd_ph_packed_size(mphf); case CMPH_CHD: /* included -- Fabiano */ return chd_packed_size(mphf); default: assert(0); } return 0; // FAILURE }
cmph_t *chd_ph_new(cmph_config_t *mph, double c) { cmph_t *mphf = NULL; chd_ph_data_t *chd_phf = NULL; chd_ph_config_data_t *chd_ph = (chd_ph_config_data_t *)mph->data; register double load_factor = c; register cmph_uint8 searching_success = 0; register cmph_uint32 max_probes = 1 << 20; // default value for max_probes register cmph_uint32 iterations = 100; chd_ph_bucket_t * buckets = NULL; chd_ph_item_t * items = NULL; register cmph_uint8 failure = 0; cmph_uint32 max_bucket_size = 0; chd_ph_sorted_list_t * sorted_lists = NULL; cmph_uint32 * disp_table = NULL; register double space_lower_bound = 0; #ifdef CMPH_TIMING double construction_time_begin = 0.0; double construction_time = 0.0; ELAPSED_TIME_IN_SECONDS(&construction_time_begin); #endif chd_ph->m = mph->key_source->nkeys; DEBUGP("m = %u\n", chd_ph->m); chd_ph->nbuckets = (cmph_uint32)(chd_ph->m/chd_ph->keys_per_bucket) + 1; DEBUGP("nbuckets = %u\n", chd_ph->nbuckets); if(load_factor < 0.5 ) { load_factor = 0.5; } if(load_factor >= 0.99) { load_factor = 0.99; } DEBUGP("load_factor = %.3f\n", load_factor); chd_ph->n = (cmph_uint32)(chd_ph->m/(chd_ph->keys_per_bin * load_factor)) + 1; //Round the number of bins to the prime immediately above if(chd_ph->n % 2 == 0) chd_ph->n++; for(;;) { if(check_primality(chd_ph->n) == 1) break; chd_ph->n += 2; // just odd numbers can be primes for n > 2 }; DEBUGP("n = %u \n", chd_ph->n); if(chd_ph->keys_per_bin == 1) { space_lower_bound = chd_ph_space_lower_bound(chd_ph->m, chd_ph->n); } if(mph->verbosity) { fprintf(stderr, "space lower bound is %.3f bits per key\n", space_lower_bound); } // We allocate the working tables buckets = chd_ph_bucket_new(chd_ph->nbuckets); items = (chd_ph_item_t *) calloc(chd_ph->m, sizeof(chd_ph_item_t)); max_probes = (cmph_uint32)(((log(chd_ph->m)/log(2))/20) * max_probes); if(chd_ph->keys_per_bin == 1) chd_ph->occup_table = (cmph_uint8 *) calloc(((chd_ph->n + 31)/32), sizeof(cmph_uint32)); else chd_ph->occup_table = (cmph_uint8 *) calloc(chd_ph->n, sizeof(cmph_uint8)); disp_table = (cmph_uint32 *) calloc(chd_ph->nbuckets, sizeof(cmph_uint32)); // // init_genrand(time(0)); while(1) { iterations --; if (mph->verbosity) { fprintf(stderr, "Starting mapping step for mph creation of %u keys with %u bins\n", chd_ph->m, chd_ph->n); } if(!chd_ph_mapping(mph, buckets, items, &max_bucket_size)) { if (mph->verbosity) { fprintf(stderr, "Failure in mapping step\n"); } failure = 1; goto cleanup; } if (mph->verbosity) { fprintf(stderr, "Starting ordering step\n"); } if(sorted_lists) { free(sorted_lists); } sorted_lists = chd_ph_ordering(&buckets, &items, chd_ph->nbuckets, chd_ph->m, max_bucket_size); if (mph->verbosity) { fprintf(stderr, "Starting searching step\n"); } searching_success = chd_ph_searching(chd_ph, buckets, items, max_bucket_size, sorted_lists, max_probes, disp_table); if(searching_success) break; // reset occup_table if(chd_ph->keys_per_bin > 1) memset(chd_ph->occup_table, 0, chd_ph->n); else memset(chd_ph->occup_table, 0, ((chd_ph->n + 31)/32) * sizeof(cmph_uint32)); if(iterations == 0) { // Cleanup memory if (mph->verbosity) { fprintf(stderr, "Failure because the max trials was exceeded\n"); } failure = 1; goto cleanup; }; } #ifdef DEBUG { if(!chd_ph_check_bin_hashing(chd_ph, buckets, items, disp_table,sorted_lists,max_bucket_size)) { DEBUGP("Error for bin packing generation"); failure = 1; goto cleanup; } } #endif if (mph->verbosity) { fprintf(stderr, "Starting compressing step\n"); } if(chd_ph->cs) { free(chd_ph->cs); } chd_ph->cs = (compressed_seq_t *) calloc(1, sizeof(compressed_seq_t)); compressed_seq_init(chd_ph->cs); compressed_seq_generate(chd_ph->cs, disp_table, chd_ph->nbuckets); #ifdef CMPH_TIMING ELAPSED_TIME_IN_SECONDS(&construction_time); register double entropy = chd_ph_get_entropy(disp_table, chd_ph->nbuckets, max_probes); DEBUGP("Entropy = %.4f\n", entropy/chd_ph->m); #endif cleanup: chd_ph_bucket_destroy(buckets); free(items); free(sorted_lists); free(disp_table); if(failure) { if(chd_ph->hl) { hash_state_destroy(chd_ph->hl); } chd_ph->hl = NULL; return NULL; } mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; chd_phf = (chd_ph_data_t *)malloc(sizeof(chd_ph_data_t)); chd_phf->cs = chd_ph->cs; chd_ph->cs = NULL; //transfer memory ownership chd_phf->hl = chd_ph->hl; chd_ph->hl = NULL; //transfer memory ownership chd_phf->n = chd_ph->n; chd_phf->nbuckets = chd_ph->nbuckets; mphf->data = chd_phf; mphf->size = chd_ph->n; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } #ifdef CMPH_TIMING register cmph_uint32 space_usage = chd_ph_packed_size(mphf)*8; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\t%.4f\t%.4f\n", chd_ph->m, load_factor, chd_ph->keys_per_bucket, construction_time, space_usage/(double)chd_ph->m, space_lower_bound, entropy/chd_ph->m); #endif return mphf; }