static void AddToHashtable(HashTable tab, char *word, DocPositionOffset_t pos) { HTKey_t hashKey; int retval; HTKeyValue kv; // Hash the string. hashKey = FNVHash64((unsigned char *) word, strlen(word)); // Have we already encountered this word within this file? // If so, it's already in the hashtable. retval = LookupHashTable(tab, hashKey, &kv); if (retval == 1) { // Yes; we just need to add a position in using AppendLinkedList(). Note // how we're casting the DocPositionOffset_t position // variable to an LLPayload_t to store // it in the linked list payload without needing to malloc space for it. // Ugly, but it works! WordPositions *wp = (WordPositions *) kv.value; retval = AppendLinkedList(wp->positions, (LLPayload_t) ((intptr_t) pos)); Verify333(retval != 0); } else { // STEP 8. // No; this is the first time we've seen this word. Allocate and prepare // a new WordPositions structure, and append the new position to its list // using a similar ugly hack as right above. WordPositions *wp; char *newstr; HTKeyValue oldkv; bool retbool; // Allocate space for a new WordPositions structure. wp = (WordPositions *)malloc(sizeof(WordPositions)); Verify333(wp != NULL); // Allocate space for the word and copy the word content. newstr = (char *)malloc(strlen(word) + 1); // +1 is for "\0" Verify333(newstr != NULL); snprintf(newstr, strlen(word) +1, "%s", word); wp->word = newstr; // Set linkedlist from positions and append linked list wp->positions = AllocateLinkedList(); Verify333(wp->positions != NULL); retbool = AppendLinkedList(wp->positions, (LLPayload_t) ((intptr_t) pos)); Verify333(retbool); // Set the key value pair and add it to hashtable. kv.key = hashKey; kv.value = wp; retval = InsertHashTable(tab, kv, &oldkv); Verify333(retval == 1); } }
char *DTLookupDocID(DocTable table, DocID_t docid) { HTKeyValue kv; int res; Verify333(table != NULL); Verify333(docid != 0); // STEP 6. // Lookup the docid in the docid_to_docname table, // and either return the string (i.e., the (char *) // saved in the value field for that key) or // NULL if the key isn't in the table. res = LookupHashTable(table->docid_to_docname, (HTKey_t)docid, &kv); if (res != 1) { return NULL; } else { return (char*)kv.value; } }
DocID_t DTLookupDocumentName(DocTable table, char *docname) { HTKey_t key; HTKeyValue kv; int res; Verify333(table != NULL); Verify333(docname != NULL); // STEP 5. // Lookup the FNVHash64() or the docname in the // docname_to_docid table within dt, and return // either "0" if the docname isn't found or the // docID if it is. key = FNVHash64((unsigned char*)docname, strlen(docname)); res = LookupHashTable(table->docname_to_docid, key, &kv); if (res != 1) { return 0; } else { return *(DocID_t*)kv.value; } }
// our main function; here, we demonstrate how to use some // of the hash table functions int main(int argc, char **argv) { ExampleValuePtr evp; HashTable ht; HTIter iter; HTKeyValue kv, old_kv; HTKey_t i; // allocate a hash table with 10,000 initial buckets ht = AllocateHashTable(10000); Verify333(ht != NULL); // insert 20,000 elements (load factor = 2.0) for (i = 0; i < 20000; i++) { evp = (ExampleValuePtr) malloc(sizeof(ExampleValue)); Verify333(evp != NULL); evp->num = i; // make sure HT has the right # of elements in it to start Verify333(NumElementsInHashTable(ht) == (HWSize_t) i); // insert a new element kv.key = FNVHashInt64((HTValue_t)i); kv.value = (HTValue_t)evp; Verify333(InsertHashTable(ht, kv, &old_kv) == 1); // make sure hash table has right # of elements post-insert Verify333(NumElementsInHashTable(ht) == (HWSize_t) (i+1)); } // look up a few values Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)100), &kv) == 1); Verify333(kv.key == FNVHashInt64((HTValue_t)100)); Verify333(((ExampleValuePtr) kv.value)->num == 100); Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)18583), &kv) == 1); Verify333(kv.key == FNVHashInt64((HTValue_t)18583)); Verify333(((ExampleValuePtr) kv.value)->num == 18583); // make sure non-existent value cannot be found Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)20000), &kv) == 0); // delete a value Verify333(RemoveFromHashTable(ht, FNVHashInt64((HTValue_t)100), &kv) == 1); Verify333(kv.key == FNVHashInt64((HTValue_t)100)); Verify333(((ExampleValuePtr) kv.value)->num == 100); ExampleValueFree(kv.value); // since we malloc'ed it, we must free it // make sure it's deleted Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)100), &kv) == 0); Verify333(NumElementsInHashTable(ht) == (HWSize_t) 19999); // loop through using an iterator i = 0; iter = HashTableMakeIterator(ht); Verify333(iter != NULL); while (HTIteratorPastEnd(iter) == 0) { Verify333(HTIteratorGet(iter, &kv) == 1); Verify333(kv.key != FNVHashInt64((HTValue_t)100)); // we deleted it // advance the iterator HTIteratorNext(iter); i++; } Verify333(i == 19999); // free the iterator HTIteratorFree(iter); // free the hash table FreeHashTable(ht, &ExampleValueFree); return 0; }