/* * This is an example program that reads words from a text-file (a * book or something like that) and uses those as keys in a hash table * (the actual data stored is unimportant). The words are case * sensitive. * * After this, the program opens another text-file and tries to match * the words in that with the words stored in the table. * * The meaning with this program is to test the speed of the table and * to provide an example of its use. */ int main(int argc, char *argv[]) { FILE *p_file; char *p_dict_name; char *p_text_name; ght_hash_table_t *p_table; ght_iterator_t iterator; struct stat stat_struct; int i_found; int i_cnt; char *p_buf; char *p_tok; const void *p_key; void *p_e; /* Create a new hash table */ if ( !(p_table = ght_create(1000)) ) { return 1; } ght_set_rehash(p_table, TRUE); /* Parse the arguments */ if (argc < 3) { printf("Usage: dict_example [-m|-t|-b] dictfile textfile\n\n" "Reads words from `dictfile' and looks up these words in `textfile'.\n" "Options:\n" " -m Use move-to-front heuristics\n" " -t Use transpose heuristics\n" " -b Use bounded buckets (use the hash table as a cache)\n" ); return 0; } else if (argc > 3) { if(strcmp(argv[1], "-m") == 0) ght_set_heuristics(p_table, GHT_HEURISTICS_MOVE_TO_FRONT); else if(strcmp(argv[1], "-t") == 0) ght_set_heuristics(p_table, GHT_HEURISTICS_TRANSPOSE); else if (strcmp(argv[1], "-b") == 0) { /* Rehashing makes no sense in "cache" mode */ ght_set_rehash(p_table, FALSE); ght_set_bounded_buckets(p_table, 3, bucket_free_callback); } p_dict_name = argv[2]; p_text_name = argv[3]; } else { /* 2 arguments */ p_dict_name = argv[1]; p_text_name = argv[2]; } /* Open the dictionary file (first check its size) */ if (stat(p_dict_name, &stat_struct) < 0) { perror("stat"); return 1; } if (!(p_buf = (char*)malloc(stat_struct.st_size))) { perror("malloc"); return 1; } /* Open the dictionary file and read that into the buffer. */ if (! (p_file = fopen(p_dict_name, "r")) ) { perror("fopen"); return 1; } fread(p_buf, sizeof(char), stat_struct.st_size, p_file); fclose(p_file); /* For each word in the dictionary file, insert it into the hash table. */ p_tok = strtok(p_buf, DELIMS); i_cnt = 0; i_found = 0; while (p_tok) { int *p_data; if ( !(p_data = (int*)malloc(sizeof(int))) ) { perror("malloc"); return 1; } *p_data = i_cnt++; /* Insert the word into the table */ if (ght_insert(p_table, p_data, strlen(p_tok), p_tok) < 0) free(p_data); /* Could not insert the item (already exists), free it. */ else i_found++; p_tok = strtok(NULL, DELIMS); } printf("Done reading %d unique words from the wordlist.\n" "Total number of words is %d.\n\n", i_found, i_cnt); free(p_buf); /* Check the size of the text file. */ if (stat(p_text_name, &stat_struct) < 0) { perror("stat"); return 1; } if (!(p_buf = (char*)malloc(stat_struct.st_size))) { perror("malloc"); return 1; } /* Open the text file and read that into the buffer. */ if (! (p_file = fopen(p_text_name, "r")) ) { perror("fopen"); return 1; } fread(p_buf, sizeof(char), stat_struct.st_size, p_file); fclose(p_file); /* For each word in the text file, check if it exists in the hash table. */ p_tok = strtok(p_buf, DELIMS); i_cnt = 0; i_found = 0; while (p_tok) { printf(" searching %s ;", p_tok); if (ght_get(p_table, strlen(p_tok), p_tok)) { i_found++; } i_cnt++; p_tok = strtok(NULL, DELIMS); } free(p_buf); printf("Found %d words out of %d words\n", i_found, i_cnt); /* Free the entry data in the table */ for(p_e = ght_first(p_table, &iterator, &p_key); p_e; p_e = ght_next(p_table, &iterator, &p_key)) { free(p_e); } /* Free the table */ ght_finalize(p_table); return 0; }
/* Rehash the hash table (i.e. change its size and reinsert all * items). This operation is slow and should not be used frequently. */ void ght_rehash(ght_hash_table_t *p_ht, unsigned int i_size) { ght_hash_table_t *p_tmp; ght_iterator_t iterator; const void *p_key; void *p; unsigned int i; DEBUG_ASSERT(p_ht); /* Recreate the hash table with the new size */ p_tmp = ght_create(i_size); DEBUG_ASSERT(p_tmp); /* Set the flags for the new hash table */ ght_set_hash(p_tmp, p_ht->fn_hash); ght_set_alloc(p_tmp, p_ht->fn_alloc, p_ht->fn_free); ght_set_heuristics(p_tmp, GHT_HEURISTICS_NONE); ght_set_rehash(p_tmp, FALSE); /* Walk through all elements in the table and insert them into the temporary one. */ for (p = ght_first(p_ht, &iterator, &p_key); p; p = ght_next(p_ht, &iterator, &p_key)) { DEBUG_ASSERT(iterator.p_entry); /* Insert the entry into the new table */ if (ght_insert(p_tmp, iterator.p_entry->p_data, iterator.p_entry->key.i_size, iterator.p_entry->key.p_key) < 0) { LOG_ERROR("Out of memory error or entry already in hash table when rehashing (internal error)\n"); } } /* Remove the old table... */ for (i=0; i<p_ht->i_size; i++) { if (p_ht->pp_entries[i]) { /* Delete the entries in the bucket */ free_entry_chain (p_ht, p_ht->pp_entries[i]); p_ht->pp_entries[i] = NULL; } } DefaultFreeFunction (p_ht->pp_entries); DefaultFreeFunction (p_ht->p_nr); /* ... and replace it with the new */ p_ht->i_size = p_tmp->i_size; p_ht->i_size_mask = p_tmp->i_size_mask; p_ht->i_items = p_tmp->i_items; p_ht->pp_entries = p_tmp->pp_entries; p_ht->p_nr = p_tmp->p_nr; p_ht->p_oldest = p_tmp->p_oldest; p_ht->p_newest = p_tmp->p_newest; /* Clean up */ p_tmp->pp_entries = NULL; p_tmp->p_nr = NULL; DefaultFreeFunction (p_tmp); }