Exemple #1
0
//Creates an occurrence, or increments the
//count of an occurrence on the class BST
//@par url - the url that the occurrence is on
void OccurrenceSet::push(string url){
	BSTNode<string,Occurrence*> * node = occurrences.Find(url);
	if(node==NULL){
		Occurrence * occurrence = new Occurrence(url);
		occurrence->addOccurrence();
		occurrences.Insert(url,occurrence);
	}else{
		node->GetValue()->addOccurrence();
	}
}
	void Crawler::addWords(BST < Pair < string,int > >* newOccurrences, string url){
		BSTIterator<Pair <string,int> > iter = newOccurrences->Iterator();
		BSTNode<Pair<string,int> > newNode(Pair<string,int>("",-1));
		BSTNode<Word>* oldNode;
		Occurrence occ;
		occ.setURL(url);
		while(iter.hasNext()){
			newNode = iter.next();
			//is either a new node or an old node
			oldNode = words->Insert(Word(newNode.GetValue().getFirst()));
			occ.setOccurrences(newNode.GetValue().getSecond());
			oldNode->GetValue().addOccurrence(occ);
		}
	}
bool WordIndex::Test (ostream & os) {
  bool success = true;
  const int PAGES = 3;
  const int WORDS = 30;
  string urlStrs[PAGES] = {
      "http://www.google.com/index.html"
    , "file:///home/file.txt"
    , "http://www.msn.com/sports.html"
  };
  URL urls[PAGES] = {
      URL(urlStrs[0])
    , URL(urlStrs[1])
    , URL(urlStrs[2])
  };
  Word words[WORDS] = {
      "and", "the", "a", "wood", "couch", "potato", "Henry", "the", "a", "and"
    , "a", "house", "dog", "wood", "couch", "frisbee", "green", "then", "why", "how"
    , "a", "a", "yes", "no", "maybe", "Henry", "the", "frisbee", "green", "couch"
  };

  WordIndex wordIndex;

  for (int i = 0; i < PAGES; i++) {
    for (int j = 0; j < WORDS; j++) {
      wordIndex.Insert(words[j], urls[i]);
    }
  }

  OccurrenceSet set = wordIndex.GetValue("a");

  BSTNode<Occurrence>* node = set.Find(Occurrence(urls[1]));
  TEST (NULL != node);

  Occurrence current = node->GetValue();
  TEST(current.getURL().getFullURL() == urls[1].getFullURL());
  TEST(current.getCount() == 5);

  return success;
}
Exemple #4
0
void OcrBST :: recursive_insert(OcrBSTNode * current_node, const Occurrence & v)
{
	int compare_result;
	compare_result = v.compare(current_node->GetValue());

	if(compare_result < 0)
	{
		//go left
		if(current_node->GetLeft() == NULL)
		{
			current_node->left = new OcrBSTNode(v);
			size++;
			returnValue = current_node->left;
		}
		else
		{
			recursive_insert(current_node->GetLeft(), v);
		}
	}

	else if(compare_result > 0)
	{
		//go right
		if(current_node->GetRight() == NULL)
		{
			current_node->right = new OcrBSTNode(v);
			size++;
			returnValue = current_node->right;
		}
		else
		{
			recursive_insert(current_node->GetRight(), v);
		}
	}
	else
	{
		returnValue = NULL;
	}

}
	int Occurrence::compare(const Occurrence & other)const{
		return url.compare(other.getURL().getURLstring());
	}