// updates the index with a given word // either adds it or increments the count of an existing document // or adds a new document to an existing word int updateIndex(char *wordIn, int idIn, IndexTable *tableIn) { // null word, invalid doc id, or nonexistent index if (!wordIn || idIn < 0 || !tableIn) { if (1 == DEBUG) { printf("updateIndex received invalid params\n"); } return 1; // failed } int location = JenkinsHash(wordIn, MAX_HASH_SLOT); WordNode *node = tableIn->table[location]; // word not in index if (!tableIn->table[location]->word) { if (1 == DEBUG) { printf("Word %s not found in index, adding it\n", wordIn); } tableIn->table[location] = newWordNode(wordIn, idIn); return 0; } // word already in index, search through list to find its node while (1) { if (strcmp(node->word, wordIn) == 0) { // found the WordNode containing wordIn if (1 == DEBUG) { printf("Found WordNode for \"%s\"\n", wordIn); } DocumentNode *doc = node->page; while (1) { if (doc->docId == idIn) { // found the DocumentNode for this doc/word combo if (1 == DEBUG) { printf("Found DocumentNode for id=%d for \"%s\"\n", idIn, wordIn); } doc->freq++; return 0; } if (!doc->next) { break; } doc = doc->next; } // made it to the end without finding a matching DocumentNode if (1 == DEBUG) { printf("No DocumentNode for id=%d for \"%s\"\n", idIn, wordIn); } DocumentNode *newDoc = newDocumentNode(idIn); doc->next = newDoc; return 0; } if (!node->next) { break; } node = node->next; } WordNode *newWord = newWordNode(wordIn, idIn); node->next = newWord; return 0; }
// Test case: TestLookUp:5 // This test calls lookUp() for the condition where // the query has both AND and OR int TestLookUp5() { START_TEST_CASE; INVERTED_INDEX* testIndex = NULL; int wordHash; int wordHash2; int wordHash3; int wordHash4; testIndex = initStructure(testIndex); wordHash = hash1("dog") % MAX_NUMBER_OF_SLOTS; DocumentNode* docNode = NULL; docNode = newDocNode(docNode, 15, 1); WordNode* wordNode = NULL; wordNode = newWordNode(wordNode, docNode, "dog"); testIndex->hash[wordHash] = wordNode; wordHash2 = hash1("cat") % MAX_NUMBER_OF_SLOTS; DocumentNode* docNode2 = NULL; docNode2 = newDocNode(docNode2, 15, 2); WordNode* wordNode2 = NULL; wordNode2 = newWordNode(wordNode2, docNode2, "cat"); testIndex->hash[wordHash2] = wordNode2; wordHash3 = hash1("mouse") % MAX_NUMBER_OF_SLOTS; DocumentNode* docNode3 = NULL; docNode3 = newDocNode(docNode3, 23, 2); WordNode* wordNode3 = NULL; wordNode3 = newWordNode(wordNode3, docNode3, "mouse"); testIndex->hash[wordHash3] = wordNode3; wordHash4 = hash1("lion") % MAX_NUMBER_OF_SLOTS; DocumentNode* docNode4 = NULL; docNode4 = newDocNode(docNode4, 23, 2); WordNode* wordNode4 = NULL; wordNode4 = newWordNode(wordNode4, docNode4, "lion"); testIndex->hash[wordHash4] = wordNode4; char query[1000] = "dog cat OR mouse lion"; sanitize(query); char* temp[1000]; BZERO(temp, 1000); char* queryList[1000]; BZERO(queryList, 1000); curateWords(queryList, query); DocumentNode* saved[1000]; BZERO(saved, 1000); lookUp(saved, queryList, testIndex); SHOULD_BE(saved[0]->document_id == docNode->document_id); SHOULD_BE(saved[0]->page_word_frequency == 3); SHOULD_BE(saved[1]->document_id == docNode3->document_id); SHOULD_BE(saved[1]->page_word_frequency == 4); cleanUpList(saved); cleanUpQueryList(queryList); BZERO(saved, 1000); cleanUpIndex(testIndex); END_TEST_CASE; }