//Creates an occurrence, or increments the //count of an occurrence on the class BST //@par url - the url that the occurrence is on void OccurrenceSet::push(string url){ BSTNode<string,Occurrence*> * node = occurrences.Find(url); if(node==NULL){ Occurrence * occurrence = new Occurrence(url); occurrence->addOccurrence(); occurrences.Insert(url,occurrence); }else{ node->GetValue()->addOccurrence(); } }
void Crawler::addWords(BST < Pair < string,int > >* newOccurrences, string url){ BSTIterator<Pair <string,int> > iter = newOccurrences->Iterator(); BSTNode<Pair<string,int> > newNode(Pair<string,int>("",-1)); BSTNode<Word>* oldNode; Occurrence occ; occ.setURL(url); while(iter.hasNext()){ newNode = iter.next(); //is either a new node or an old node oldNode = words->Insert(Word(newNode.GetValue().getFirst())); occ.setOccurrences(newNode.GetValue().getSecond()); oldNode->GetValue().addOccurrence(occ); } }
bool WordIndex::Test (ostream & os) { bool success = true; const int PAGES = 3; const int WORDS = 30; string urlStrs[PAGES] = { "http://www.google.com/index.html" , "file:///home/file.txt" , "http://www.msn.com/sports.html" }; URL urls[PAGES] = { URL(urlStrs[0]) , URL(urlStrs[1]) , URL(urlStrs[2]) }; Word words[WORDS] = { "and", "the", "a", "wood", "couch", "potato", "Henry", "the", "a", "and" , "a", "house", "dog", "wood", "couch", "frisbee", "green", "then", "why", "how" , "a", "a", "yes", "no", "maybe", "Henry", "the", "frisbee", "green", "couch" }; WordIndex wordIndex; for (int i = 0; i < PAGES; i++) { for (int j = 0; j < WORDS; j++) { wordIndex.Insert(words[j], urls[i]); } } OccurrenceSet set = wordIndex.GetValue("a"); BSTNode<Occurrence>* node = set.Find(Occurrence(urls[1])); TEST (NULL != node); Occurrence current = node->GetValue(); TEST(current.getURL().getFullURL() == urls[1].getFullURL()); TEST(current.getCount() == 5); return success; }
void OcrBST :: recursive_insert(OcrBSTNode * current_node, const Occurrence & v) { int compare_result; compare_result = v.compare(current_node->GetValue()); if(compare_result < 0) { //go left if(current_node->GetLeft() == NULL) { current_node->left = new OcrBSTNode(v); size++; returnValue = current_node->left; } else { recursive_insert(current_node->GetLeft(), v); } } else if(compare_result > 0) { //go right if(current_node->GetRight() == NULL) { current_node->right = new OcrBSTNode(v); size++; returnValue = current_node->right; } else { recursive_insert(current_node->GetRight(), v); } } else { returnValue = NULL; } }
int Occurrence::compare(const Occurrence & other)const{ return url.compare(other.getURL().getURLstring()); }