int pack_n_save(struct cmph_hash_obj *o, char *filename) { size_t size; int fp = open(filename,O_RDWR|O_CREAT); if(fp > 0) { void *p = malloc(size = cmph_packed_size(o->hash)); if(p !=NULL) { cmph_pack(o->hash, p); write(fp,p,size); free(p); } close(fp); } }
int test(cmph_uint32* items_to_hash, cmph_uint32 items_len, CMPH_ALGO alg_n) { cmph_t *hash; cmph_config_t *config; cmph_io_adapter_t *source; cmph_uint32 i; char filename[256]; FILE* mphf_fd = NULL; printf("%s (%u)\n", cmph_names[alg_n], alg_n); source = cmph_io_struct_vector_adapter(items_to_hash, (cmph_uint32)sizeof(cmph_uint32), 0, (cmph_uint32)sizeof(cmph_uint32), items_len); config = cmph_config_new(source); cmph_config_set_algo(config, alg_n); if (alg_n == CMPH_BRZ) { sprintf(filename, "%s_%u.mph", cmph_names[alg_n], items_len); mphf_fd = fopen(filename, "w"); cmph_config_set_mphf_fd(config, mphf_fd); } hash = cmph_new(config); cmph_config_destroy(config); if (alg_n == CMPH_BRZ) { cmph_dump(hash, mphf_fd); cmph_destroy(hash); fclose(mphf_fd); mphf_fd = fopen(filename, "r"); hash = cmph_load(mphf_fd); } printf("packed_size %u\n",cmph_packed_size(hash)); for (i=0; i<items_len; ++i) printf("%d -> %u\n", items_to_hash[i], cmph_search(hash, (char*)(items_to_hash+i), (cmph_uint32)sizeof(cmph_uint32))); printf("\n"); cmph_io_vector_adapter_destroy(source); cmph_destroy(hash); if (alg_n == CMPH_BRZ) { fclose(mphf_fd); } return 0; }
cmph_t *chd_new(cmph_config_t *mph, double c) { cmph_t *mphf = NULL; chd_data_t *chdf = NULL; chd_config_data_t *chd = (chd_config_data_t *)mph->data; chd_ph_config_data_t * chd_ph = (chd_ph_config_data_t *)chd->chd_ph->data; compressed_rank_t cr; register cmph_t * chd_phf = NULL; register cmph_uint32 packed_chd_phf_size = 0; cmph_uint8 * packed_chd_phf = NULL; register cmph_uint32 packed_cr_size = 0; cmph_uint8 * packed_cr = NULL; register cmph_uint32 i, idx, nkeys, nvals, nbins; cmph_uint32 * vals_table = NULL; register cmph_uint32 * occup_table = NULL; #ifdef CMPH_TIMING double construction_time_begin = 0.0; double construction_time = 0.0; ELAPSED_TIME_IN_SECONDS(&construction_time_begin); #endif cmph_config_set_verbosity(chd->chd_ph, mph->verbosity); cmph_config_set_graphsize(chd->chd_ph, c); if (mph->verbosity) { fprintf(stderr, "Generating a CHD_PH perfect hash function with a load factor equal to %.3f\n", c); } chd_phf = cmph_new(chd->chd_ph); if(chd_phf == NULL) { return NULL; } packed_chd_phf_size = cmph_packed_size(chd_phf); DEBUGP("packed_chd_phf_size = %u\n", packed_chd_phf_size); /* Make sure that we have enough space to pack the mphf. */ packed_chd_phf = calloc((size_t)packed_chd_phf_size,(size_t)1); /* Pack the mphf. */ cmph_pack(chd_phf, packed_chd_phf); cmph_destroy(chd_phf); if (mph->verbosity) { fprintf(stderr, "Compressing the range of the resulting CHD_PH perfect hash function\n"); } compressed_rank_init(&cr); nbins = chd_ph->n; nkeys = chd_ph->m; nvals = nbins - nkeys; vals_table = (cmph_uint32 *)calloc(nvals, sizeof(cmph_uint32)); occup_table = (cmph_uint32 *)chd_ph->occup_table; for(i = 0, idx = 0; i < nbins; i++) { if(!GETBIT32(occup_table, i)) { vals_table[idx++] = i; } } compressed_rank_generate(&cr, vals_table, nvals); free(vals_table); packed_cr_size = compressed_rank_packed_size(&cr); packed_cr = (cmph_uint8 *) calloc(packed_cr_size, sizeof(cmph_uint8)); compressed_rank_pack(&cr, packed_cr); compressed_rank_destroy(&cr); mphf = (cmph_t *)malloc(sizeof(cmph_t)); mphf->algo = mph->algo; chdf = (chd_data_t *)malloc(sizeof(chd_data_t)); chdf->packed_cr = packed_cr; packed_cr = NULL; //transfer memory ownership chdf->packed_chd_phf = packed_chd_phf; packed_chd_phf = NULL; //transfer memory ownership chdf->packed_chd_phf_size = packed_chd_phf_size; chdf->packed_cr_size = packed_cr_size; mphf->data = chdf; mphf->size = nkeys; DEBUGP("Successfully generated minimal perfect hash\n"); if (mph->verbosity) { fprintf(stderr, "Successfully generated minimal perfect hash function\n"); } #ifdef CMPH_TIMING ELAPSED_TIME_IN_SECONDS(&construction_time); register cmph_uint32 space_usage = chd_packed_size(mphf)*8; construction_time = construction_time - construction_time_begin; fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", nkeys, c, chd_ph->keys_per_bucket, construction_time, space_usage/(double)nkeys); #endif return mphf; }