bool IndexerImpl::incorporate(string url, WordBag& wb)
{
    // adding to urlToId map, adding to idToUrl map
    int* temp = urlToId.find(url);
    if (temp != nullptr) return false;
    
    string word;
    int retrieveCount;
    bool getCount = wb.getFirstWord(word, retrieveCount);
    if (getCount == false) return false;
    
    int ID = rand() %100000;
    while (idToUrl.find(ID) != nullptr) { ID = rand() %100000; }
    urlToId.associate(url, ID); // add to urlToId map
    idToUrl.associate(ID, url); // add to idToUrl map
    
    while (getCount) {
        if (wordToIdCounts.find(word) != nullptr) {
            vector<Pair> *x = wordToIdCounts.find(word);
            x->push_back(Pair(ID, retrieveCount));
        }
        else {
            vector<Pair> y;
            y.push_back(Pair(ID, retrieveCount));
            wordToIdCounts.associate(word, y);
        }
        getCount = wb.getNextWord(word, retrieveCount);
    }
    return true;
//    The user calls the incorporate() method to add all the words in the provided WordBag argument to the index. If incorporate() has previously been called with the same url argument, it returns false, doing nothing. Otherwise, it updates the index by adding all the words and returns true. Incorporating a WordBag containing W distinct words to an index that already contains N words must take far less than O(WN) time, because adding one word mapping (e.g., "fun" → { "www.b.com", 1 }) to an index that already contains N words must take far less than O(N) time.
}
Example #2
0
bool IndexerImpl::incorporate(string url, WordBag& wb)
{

    int hash;
    //Generates a hash of url as well as index for linear probing
    int modhash = hashurl(url,hash);
    //If the head bucket of the array is not initialized, initialize it
    if (idToUrl[modhash]==NULL)
        idToUrl[modhash]=new MyMap<int,string>;
    //Checks to see if this url has already been passed to incorporate
    if (idToUrl[modhash]->find(hash)!=NULL)
        return false;
    //Adds url to hash-url map
    if (idToUrl[modhash]->size()==0)
        indexes.push_back(modhash);
    idToUrl[modhash]->associate(hash,url);
    
    
    string word;
    int count;
    //Iterates through W distinct words in wordbag. O(W)
    bool gotAWord = wb.getFirstWord(word, count);
    while (gotAWord)
    {
        //Accounts for case-insensitive indexing.
        strToLower(word);
        point bucket = point(hash,count);
        //Find if word already exists. O(log N).
        vector<point>* temp = urlToCount->find(word);
        if (temp==NULL)
        {
            //The word does not yet exist in the index
            //Create a temp vector
            temp = new vector<point>;
        }
        temp->push_back(bucket);
        //Map word to temp Map of urlhash to wordcount
        urlToCount->associate(word, *temp);
        gotAWord = wb.getNextWord(word,count);
    }
    //Final BigO is O(WlogN) which is less than O(WN)
    return true;
}