void SearchEng::add_parse_page(std::string filename, PageParser* parser) { MySetString currentWords; MySetString currentLinks_strings; parser->parse(filename, currentWords, currentLinks_strings); std::cout << "Just ran parse" << std::endl; WebPage* currentPage = new WebPage; currentPage->filename( filename ); currentPage->all_words(currentWords); for( MySetString::iterator it = currentLinks_strings.begin(); it != currentLinks_strings.end(); ++it) { currentPage->add_outgoing_link(looks_for_webpage(*it)); //Need to deal with links still /*MySetString::iterator it2 = allPages.find(*it); if(it2 != allPages.end() ) { currentPage->add_outgoing_link(it); }*/ } allPages.insert(currentPage); // std::cout << "inserted page" << std::endl; //debug MySetString::iterator it; for( it = currentWords.begin(); it != currentWords.end(); ++it ) { // makes wordMap; std::string word = *it; makeLower(word); std::map<std::string, MySetWebPage>::iterator it2 = wordMap.find(word); //std::cout << "trying to insert: " << word << std::endl; //debug if(it2 != wordMap.end()) { it2->second.insert(currentPage); //std::cout << "added page to " << it2->first << std::endl; //debug } else { MySetWebPage newSet; newSet.insert( currentPage ); wordMap.insert( make_pair(word, newSet)); //std::cout << "inserted: " << word << std::endl; //debug } //std::cout << "went through for loop" << std::endl; //debug } //deals with all incoming links; MySetWebPage::iterator it3; for( it3 = allPages.begin(); it3!= allPages.end(); ++it3) { WebPage* outgoingPage = *it3; MySetWebPage outgoingLinks = outgoingPage->outgoing_links(); MySetWebPage::iterator it4; for( it4 = outgoingLinks.begin(); it4 != outgoingLinks.end(); ++it4) { WebPage* incomingPage = *it4; incomingPage->add_incoming_link(outgoingPage); } } //delete currentPage; return; }
void SearchEng::add_parse_page(std::string filename, PageParser* parser){ MySet<std::string> temp_all_words; MySet<std::string> temp_all_links; parser->parse(filename, temp_all_words, temp_all_links); if(!map_all_webpages.count(filename) ){//this filename isn't in the map //create a new webpage and store a pointer to it in our map WebPage* wp = new WebPage(filename); map_all_webpages.insert ( std::pair<std::string, WebPage*>(filename, wp) ); map_all_webpages[filename]->all_words(temp_all_words);//same as wp->all_words(temp_all_words) std::set<std::string>::iterator it; for(it = temp_all_links.begin(); it != temp_all_links.end(); ++it){ std::ifstream checkExistence( (*it).c_str() ); if(!checkExistence.fail() ){ if(!map_all_webpages.count(*it) ){//add the link to our map if its not in there WebPage* wp2 = new WebPage(*it); map_all_webpages.insert ( std::pair<std::string, WebPage*>(*it, wp2) ); } wp->add_outgoing_link(map_all_webpages[*it]); map_all_webpages[*it]->add_incoming_link(wp); } if(checkExistence.is_open() ){ checkExistence.close(); } } //fill second map std::set<std::string>::iterator it2; for(it2 = temp_all_words.begin(); it2 != temp_all_words.end(); ++it2){ std::string tmp = *it2; str_to_lower(tmp); if(!word_to_setofwps.count(tmp) ){// curr word not inside map MySet<WebPage*> set_wp; set_wp.insert(wp); word_to_setofwps.insert( std::make_pair(tmp, set_wp) ); } else{// curr word is inside map word_to_setofwps[tmp].insert(wp); } } }else{//webpage is in the map but hasnt had its member set filled in WebPage* wp = map_all_webpages[filename]; wp->all_words(temp_all_words); //populate filename's std::set<std::string>::iterator it; for(it = temp_all_links.begin(); it != temp_all_links.end(); ++it){ std::ifstream checkExistence( (*it).c_str() ); if(!checkExistence.fail() ){ if(!map_all_webpages.count(*it) ){//add the link to our map if its not in there WebPage* wp2 = new WebPage(*it); map_all_webpages.insert ( std::pair<std::string, WebPage*>(*it, wp2) ); } wp->add_outgoing_link(map_all_webpages[*it]); map_all_webpages[*it]->add_incoming_link(wp); } if(checkExistence.is_open() ){ checkExistence.close(); } } //fill second map std::set<std::string>::iterator it2; for(it2 = temp_all_words.begin(); it2 != temp_all_words.end(); ++it2){ std::string tmp = *it2; str_to_lower(tmp); if(!word_to_setofwps.count(tmp) ){// curr word not inside map MySet<WebPage*> set_wp; set_wp.insert(wp); word_to_setofwps.insert( std::make_pair(tmp, set_wp) ); } else{// curr word is inside map word_to_setofwps[tmp].insert(wp); } } } }