void pageRankHelper(myset<WebPage*>& pages, int steps) { // for each step for (int i=0; i<steps; i++) { cout << "STEP " << i+1 << endl; // create temp map to store PR for current step map<string,double> temp; for (set<WebPage*>::iterator it = pages.begin(); it != pages.end(); ++it) { double sum = 0; set<WebPage*> in = (*it)->incoming_links(); for (set<WebPage*>::iterator it2 = in.begin(); it2 != in.end(); ++it2) { sum += (*it2)->PR()/((*it2)->outgoing_links().size() + 1); // all nodes have a self loop } // add itself as an incoming AND outgoing link sum += (*it)->PR()/((*it)->outgoing_links().size() + 1); temp.insert(pair<string,double>((*it)->filename(), sum)); //cout << " insert:" << (*it)->filename() << " " << sum << endl; } // update PR for each page double total = 0; for (set<WebPage*>::iterator it = pages.begin(); it != pages.end(); ++it) { if (temp.find((*it)->filename()) == temp.end()) cout << "CANNOT FIND" << endl; // update its PR value ----------------------------------make e a variable in config ------- double e = 0.15; double pr = (1-e)*temp.find((*it)->filename())->second + e/pages.size(); (*it)->setPR(pr); cout << " " << (*it)->filename() << " " << pr << endl; total += pr; } cout << " " << total << endl; } }
void pageRank(myset<WebPage*>& pages, int steps, char* configFile) { for (set<WebPage*>::iterator it = pages.begin(); it != pages.end(); ++it) { (*it)->setPR(1.0/pages.size()); } //pageRankHelper(pages, steps); }
/** * Updates the set containing all unique words in the text */ void WebPage::all_words(const myset<std::string> & words){ for(myset<std::string> ::iterator it = words.begin(); it != words.end(); ++it){ allWords.insert(*it); } }