void WordIndex::Insert (const Word & word, const URL & url) { bool containsWord = Map<Word, OccurrenceSet>::Contains(word); if (true == containsWord) { // The word is already in this index -- increment occurrence OccurrenceSet dummySet; MapNode<Word, OccurrenceSet> mapNode(word, dummySet); BSTNode< MapNode<Word, OccurrenceSet> >* node = BST< MapNode<Word, OccurrenceSet> >::Find(mapNode); Occurrence wrapper(url); BSTNode<Occurrence> * oNode = node->GetValue().GetValue().Find(wrapper); if (NULL != oNode) { // word occurred on a known web page oNode->GetValue().increment(); } else { // word has an occurrence on a new web page bool wasInserted = node->GetValue().GetValue().Insert(wrapper); assert(wasInserted == true); } } else { // We need to add the word to this index OccurrenceSet set; Occurrence occurrence(url); bool wasAdded = set.Insert(occurrence); assert(wasAdded == true); Map<Word, OccurrenceSet>::Insert(word, set); } }
void Crawler::addWords(BST < Pair < string,int > >* newOccurrences, string url){ BSTIterator<Pair <string,int> > iter = newOccurrences->Iterator(); BSTNode<Pair<string,int> > newNode(Pair<string,int>("",-1)); BSTNode<Word>* oldNode; Occurrence occ; occ.setURL(url); while(iter.hasNext()){ newNode = iter.next(); //is either a new node or an old node oldNode = words->Insert(Word(newNode.GetValue().getFirst())); occ.setOccurrences(newNode.GetValue().getSecond()); oldNode->GetValue().addOccurrence(occ); } }
bool WordIndex::Test (ostream & os) { bool success = true; const int PAGES = 3; const int WORDS = 30; string urlStrs[PAGES] = { "http://www.google.com/index.html" , "file:///home/file.txt" , "http://www.msn.com/sports.html" }; URL urls[PAGES] = { URL(urlStrs[0]) , URL(urlStrs[1]) , URL(urlStrs[2]) }; Word words[WORDS] = { "and", "the", "a", "wood", "couch", "potato", "Henry", "the", "a", "and" , "a", "house", "dog", "wood", "couch", "frisbee", "green", "then", "why", "how" , "a", "a", "yes", "no", "maybe", "Henry", "the", "frisbee", "green", "couch" }; WordIndex wordIndex; for (int i = 0; i < PAGES; i++) { for (int j = 0; j < WORDS; j++) { wordIndex.Insert(words[j], urls[i]); } } OccurrenceSet set = wordIndex.GetValue("a"); BSTNode<Occurrence>* node = set.Find(Occurrence(urls[1])); TEST (NULL != node); Occurrence current = node->GetValue(); TEST(current.getURL().getFullURL() == urls[1].getFullURL()); TEST(current.getCount() == 5); return success; }