Example #1
0
void WordIndex::Insert (const Word & word, const URL & url) {
  bool containsWord = Map<Word, OccurrenceSet>::Contains(word);

  if (true == containsWord) {
    // The word is already in this index -- increment occurrence
    OccurrenceSet dummySet;
    MapNode<Word, OccurrenceSet> mapNode(word, dummySet);
    BSTNode< MapNode<Word, OccurrenceSet> >* node = 
      BST< MapNode<Word, OccurrenceSet> >::Find(mapNode);

    Occurrence wrapper(url);
    BSTNode<Occurrence> * oNode = node->GetValue().GetValue().Find(wrapper);

    if (NULL != oNode) {
      // word occurred on a known web page
      oNode->GetValue().increment();
    } else {
      // word has an occurrence on a new web page
      bool wasInserted = node->GetValue().GetValue().Insert(wrapper);
      assert(wasInserted == true);
    }
  } else {
    // We need to add the word to this index
    OccurrenceSet set;
    Occurrence occurrence(url);
    bool wasAdded = set.Insert(occurrence);
    assert(wasAdded == true);

    Map<Word, OccurrenceSet>::Insert(word, set);
  }
}
Example #2
0
	void Crawler::addWords(BST < Pair < string,int > >* newOccurrences, string url){
		BSTIterator<Pair <string,int> > iter = newOccurrences->Iterator();
		BSTNode<Pair<string,int> > newNode(Pair<string,int>("",-1));
		BSTNode<Word>* oldNode;
		Occurrence occ;
		occ.setURL(url);
		while(iter.hasNext()){
			newNode = iter.next();
			//is either a new node or an old node
			oldNode = words->Insert(Word(newNode.GetValue().getFirst()));
			occ.setOccurrences(newNode.GetValue().getSecond());
			oldNode->GetValue().addOccurrence(occ);
		}
	}
Example #3
0
bool WordIndex::Test (ostream & os) {
  bool success = true;
  const int PAGES = 3;
  const int WORDS = 30;
  string urlStrs[PAGES] = {
      "http://www.google.com/index.html"
    , "file:///home/file.txt"
    , "http://www.msn.com/sports.html"
  };
  URL urls[PAGES] = {
      URL(urlStrs[0])
    , URL(urlStrs[1])
    , URL(urlStrs[2])
  };
  Word words[WORDS] = {
      "and", "the", "a", "wood", "couch", "potato", "Henry", "the", "a", "and"
    , "a", "house", "dog", "wood", "couch", "frisbee", "green", "then", "why", "how"
    , "a", "a", "yes", "no", "maybe", "Henry", "the", "frisbee", "green", "couch"
  };

  WordIndex wordIndex;

  for (int i = 0; i < PAGES; i++) {
    for (int j = 0; j < WORDS; j++) {
      wordIndex.Insert(words[j], urls[i]);
    }
  }

  OccurrenceSet set = wordIndex.GetValue("a");

  BSTNode<Occurrence>* node = set.Find(Occurrence(urls[1]));
  TEST (NULL != node);

  Occurrence current = node->GetValue();
  TEST(current.getURL().getFullURL() == urls[1].getFullURL());
  TEST(current.getCount() == 5);

  return success;
}