bool IndexerImpl::incorporate(string url, WordBag& wb) { // adding to urlToId map, adding to idToUrl map int* temp = urlToId.find(url); if (temp != nullptr) return false; string word; int retrieveCount; bool getCount = wb.getFirstWord(word, retrieveCount); if (getCount == false) return false; int ID = rand() %100000; while (idToUrl.find(ID) != nullptr) { ID = rand() %100000; } urlToId.associate(url, ID); // add to urlToId map idToUrl.associate(ID, url); // add to idToUrl map while (getCount) { if (wordToIdCounts.find(word) != nullptr) { vector<Pair> *x = wordToIdCounts.find(word); x->push_back(Pair(ID, retrieveCount)); } else { vector<Pair> y; y.push_back(Pair(ID, retrieveCount)); wordToIdCounts.associate(word, y); } getCount = wb.getNextWord(word, retrieveCount); } return true; // The user calls the incorporate() method to add all the words in the provided WordBag argument to the index. If incorporate() has previously been called with the same url argument, it returns false, doing nothing. Otherwise, it updates the index by adding all the words and returns true. Incorporating a WordBag containing W distinct words to an index that already contains N words must take far less than O(WN) time, because adding one word mapping (e.g., "fun" → { "www.b.com", 1 }) to an index that already contains N words must take far less than O(N) time. }
bool IndexerImpl::incorporate(string url, WordBag& wb) { int hash; //Generates a hash of url as well as index for linear probing int modhash = hashurl(url,hash); //If the head bucket of the array is not initialized, initialize it if (idToUrl[modhash]==NULL) idToUrl[modhash]=new MyMap<int,string>; //Checks to see if this url has already been passed to incorporate if (idToUrl[modhash]->find(hash)!=NULL) return false; //Adds url to hash-url map if (idToUrl[modhash]->size()==0) indexes.push_back(modhash); idToUrl[modhash]->associate(hash,url); string word; int count; //Iterates through W distinct words in wordbag. O(W) bool gotAWord = wb.getFirstWord(word, count); while (gotAWord) { //Accounts for case-insensitive indexing. strToLower(word); point bucket = point(hash,count); //Find if word already exists. O(log N). vector<point>* temp = urlToCount->find(word); if (temp==NULL) { //The word does not yet exist in the index //Create a temp vector temp = new vector<point>; } temp->push_back(bucket); //Map word to temp Map of urlhash to wordcount urlToCount->associate(word, *temp); gotAWord = wb.getNextWord(word,count); } //Final BigO is O(WlogN) which is less than O(WN) return true; }