Ejemplo n.º 1
0
void SearchEng::add_parse_page(std::string filename, 
		      PageParser* parser) {
				  
				MySetString currentWords;
				MySetString currentLinks_strings;
			  	parser->parse(filename, currentWords, currentLinks_strings);
				  std::cout << "Just ran parse" << std::endl;
				WebPage* currentPage = new WebPage;
						 
				currentPage->filename( filename );
				currentPage->all_words(currentWords);
				for( MySetString::iterator it = currentLinks_strings.begin(); it != currentLinks_strings.end(); ++it) {
					currentPage->add_outgoing_link(looks_for_webpage(*it));
					
					//Need to deal with links still
					
					/*MySetString::iterator it2 = allPages.find(*it);
					if(it2 != allPages.end() ) {
						currentPage->add_outgoing_link(it);
					}*/
				}
				allPages.insert(currentPage); 
				 // std::cout << "inserted page" << std::endl; //debug
				  MySetString::iterator it;
				for( it = currentWords.begin(); it != currentWords.end(); ++it ) { // makes wordMap;
					std::string word = *it;
					makeLower(word);
					std::map<std::string, MySetWebPage>::iterator it2 = wordMap.find(word);
					//std::cout << "trying to insert: " << word << std::endl; //debug
					if(it2 != wordMap.end()) {
						it2->second.insert(currentPage);
						//std::cout << "added page to " << it2->first << std::endl; //debug
					} else {
						MySetWebPage newSet;
						newSet.insert( currentPage );
						wordMap.insert( make_pair(word, newSet));
						//std::cout << "inserted: " << word << std::endl; //debug
					}
					//std::cout << "went through for loop" << std::endl; //debug
				}  
				
				//deals with all incoming links;
				MySetWebPage::iterator it3;
				for( it3 = allPages.begin(); it3!= allPages.end(); ++it3) {
					WebPage* outgoingPage = *it3;
					MySetWebPage outgoingLinks = outgoingPage->outgoing_links();
					
					MySetWebPage::iterator it4;
					for( it4 = outgoingLinks.begin(); it4 != outgoingLinks.end(); ++it4) {
						WebPage* incomingPage = *it4;
						incomingPage->add_incoming_link(outgoingPage);
					}
				}
				
				//delete currentPage;
				return;
			  }
void SearchEng::add_parse_page(std::string filename, PageParser* parser){
	MySet<std::string> temp_all_words;
	MySet<std::string> temp_all_links;

	parser->parse(filename, temp_all_words, temp_all_links);
	

	if(!map_all_webpages.count(filename) ){//this filename isn't in the map
		//create a new webpage and store a pointer to it in our map
		WebPage* wp = new WebPage(filename);
		map_all_webpages.insert ( std::pair<std::string, WebPage*>(filename, wp) );

		map_all_webpages[filename]->all_words(temp_all_words);//same as wp->all_words(temp_all_words)

		std::set<std::string>::iterator it;
		for(it = temp_all_links.begin(); it != temp_all_links.end(); ++it){
			std::ifstream checkExistence( (*it).c_str() );
			if(!checkExistence.fail() ){
				if(!map_all_webpages.count(*it) ){//add the link to our map if its not in there
					WebPage* wp2 = new WebPage(*it);
					map_all_webpages.insert ( std::pair<std::string, WebPage*>(*it, wp2) );
				}
				wp->add_outgoing_link(map_all_webpages[*it]);
				map_all_webpages[*it]->add_incoming_link(wp);
			}
			if(checkExistence.is_open() ){
				checkExistence.close();
			}
		}


		//fill second map
		std::set<std::string>::iterator it2;
		for(it2 = temp_all_words.begin(); it2 != temp_all_words.end(); ++it2){
			std::string tmp = *it2;
			str_to_lower(tmp);
			if(!word_to_setofwps.count(tmp) ){// curr word not inside map
				
				MySet<WebPage*> set_wp;
				set_wp.insert(wp);
				word_to_setofwps.insert( std::make_pair(tmp, set_wp) );
			}
			else{// curr word is inside map
				word_to_setofwps[tmp].insert(wp);
			}
		}

	}else{//webpage is in the map but hasnt had its member set filled in
		WebPage* wp = map_all_webpages[filename];

		wp->all_words(temp_all_words);

		//populate filename's 
		std::set<std::string>::iterator it;
		for(it = temp_all_links.begin(); it != temp_all_links.end(); ++it){
			std::ifstream checkExistence( (*it).c_str() );
			if(!checkExistence.fail() ){
				if(!map_all_webpages.count(*it) ){//add the link to our map if its not in there
					WebPage* wp2 = new WebPage(*it);
					map_all_webpages.insert ( std::pair<std::string, WebPage*>(*it, wp2) );
				}
				wp->add_outgoing_link(map_all_webpages[*it]);
				map_all_webpages[*it]->add_incoming_link(wp);
			}
			if(checkExistence.is_open() ){
				checkExistence.close();
			}
		}


		//fill second map
		std::set<std::string>::iterator it2;
		for(it2 = temp_all_words.begin(); it2 != temp_all_words.end(); ++it2){
			std::string tmp = *it2;
			str_to_lower(tmp);
			if(!word_to_setofwps.count(tmp) ){// curr word not inside map
				
				MySet<WebPage*> set_wp;
				set_wp.insert(wp);
				word_to_setofwps.insert( std::make_pair(tmp, set_wp) );
			}
			else{// curr word is inside map
				word_to_setofwps[tmp].insert(wp);
			}
		}
	}

	
}