void IndexData::reassignKeywordIds() { map<TrieNode *, unsigned> trieNodeIdMapper; // this->trie->reassignKeywordIds(trieNodeIdMapper); // Generating an ID mapper by iterating through the set of trie nodes whose // ids need to be reassigned // a map from temperory id to new ids, this map is used for changing forwardIndex and quadTree map<unsigned, unsigned> keywordIdMapper; for (map<TrieNode *, unsigned>::iterator iter = trieNodeIdMapper.begin(); iter != trieNodeIdMapper.end(); ++iter) { TrieNode *node = iter->first; unsigned newKeywordId = iter->second; keywordIdMapper[node->getId()] = newKeywordId; node->setId(newKeywordId); // set the new keyword Id } map<unsigned, unsigned> processedRecordIds; // keep track of records that have been converted // Now we have the ID mapper. We want to go through the trie nodes one by one. // For each of them, access its inverted list. For each record, // use the id mapper to change the integers on the forward list. changeKeywordIdsOnForwardLists(trieNodeIdMapper, keywordIdMapper, processedRecordIds); // apply the ID mapper on the keyword ids of empty leaf nodes this->trie->applyKeywordIdMapperOnEmptyLeafNodes(keywordIdMapper); }
void test2_ThreadSafe() { Trie *trie1 = new Trie(); unsigned invertedIndexOffset; //"Tom Smith and Jack Lennon come Yesterday Once More" trie1->addKeyword("tom", invertedIndexOffset); trie1->addKeyword("smith", invertedIndexOffset); trie1->addKeyword("and", invertedIndexOffset); trie1->addKeyword("jack", invertedIndexOffset); trie1->addKeyword("lennon", invertedIndexOffset); trie1->addKeyword("come", invertedIndexOffset); trie1->addKeyword("yesterday", invertedIndexOffset); trie1->addKeyword("once", invertedIndexOffset); trie1->addKeyword("more", invertedIndexOffset); /* record->setAttributeValue(0, "George Harris"); record->setAttributeValue(1, "Here comes the sun"); */ trie1->addKeyword("george", invertedIndexOffset); trie1->addKeyword("harris", invertedIndexOffset); trie1->addKeyword("here", invertedIndexOffset); trie1->addKeyword("comes", invertedIndexOffset); trie1->addKeyword("the", invertedIndexOffset); trie1->addKeyword("sun", invertedIndexOffset); /* record->setAttributeValue(0, "Pink Floyd"); record->setAttributeValue(1, "Shine on you crazy diamond"); */ trie1->addKeyword("pink", invertedIndexOffset); trie1->addKeyword("floyd", invertedIndexOffset); trie1->addKeyword("shine", invertedIndexOffset); trie1->addKeyword("on", invertedIndexOffset); trie1->addKeyword("you", invertedIndexOffset); trie1->addKeyword("crazy", invertedIndexOffset); trie1->addKeyword("diamond", invertedIndexOffset); /* record->setAttributeValue(0, "Uriah Hepp"); record->setAttributeValue(1, "Come Shine away Melinda "); */ trie1->addKeyword("uriah", invertedIndexOffset); trie1->addKeyword("hepp", invertedIndexOffset); trie1->addKeyword("come", invertedIndexOffset); trie1->addKeyword("shine", invertedIndexOffset); trie1->addKeyword("away", invertedIndexOffset); trie1->addKeyword("melinda", invertedIndexOffset); /* record->setAttributeValue(0, "Pinksyponzi Floydsyponzi"); record->setAttributeValue(1, "Shinesyponzi on - Wish you were here"); */ trie1->addKeyword("pinksyponzi", invertedIndexOffset); trie1->addKeyword("floydsyponzi", invertedIndexOffset); trie1->addKeyword("shinesyponzi", invertedIndexOffset); trie1->addKeyword("on", invertedIndexOffset); trie1->addKeyword("wish", invertedIndexOffset); trie1->addKeyword("you", invertedIndexOffset); trie1->addKeyword("were", invertedIndexOffset); trie1->addKeyword("here", invertedIndexOffset); /* record->setAttributeValue(0, "U2 2345 Pink"); record->setAttributeValue(1, "with or without you"); */ trie1->addKeyword("u2", invertedIndexOffset); trie1->addKeyword("2345", invertedIndexOffset); trie1->addKeyword("pink", invertedIndexOffset); trie1->addKeyword("with", invertedIndexOffset); trie1->addKeyword("or", invertedIndexOffset); trie1->addKeyword("without", invertedIndexOffset); trie1->addKeyword("you", invertedIndexOffset); /* record->setAttributeValue(0, "Led Zepplelin"); record->setAttributeValue(1, "Stairway to Heaven pink floyd"); */ trie1->addKeyword("led", invertedIndexOffset); trie1->addKeyword("zepplelin", invertedIndexOffset); trie1->addKeyword("stairway", invertedIndexOffset); trie1->addKeyword("to", invertedIndexOffset); trie1->addKeyword("heaven", invertedIndexOffset); trie1->addKeyword("pink", invertedIndexOffset); trie1->addKeyword("floyd", invertedIndexOffset); /* record->setAttributeValue(0, "Jimi Hendrix"); record->setAttributeValue(1, "Little wing"); */ trie1->addKeyword("jimi", invertedIndexOffset); trie1->addKeyword("hendrix", invertedIndexOffset); trie1->addKeyword("little", invertedIndexOffset); trie1->addKeyword("wing", invertedIndexOffset); trie1->commit(); trie1->finalCommit_finalizeHistogramInformation(NULL, NULL, 0); trie1->print_Trie(); cout<<"\nBefore Commit:" << std::endl; trie1->addKeyword_ThreadSafe("winger", invertedIndexOffset); trie1->addKeyword_ThreadSafe("wing", invertedIndexOffset); trie1->addKeyword_ThreadSafe("aaaa", invertedIndexOffset); trie1->addKeyword_ThreadSafe("qqqq", invertedIndexOffset); trie1->addKeyword_ThreadSafe("zzzz", invertedIndexOffset); trie1->addKeyword_ThreadSafe("steve", invertedIndexOffset); trie1->addKeyword_ThreadSafe("jobs", invertedIndexOffset); trie1->addKeyword_ThreadSafe("stanford", invertedIndexOffset); trie1->addKeyword_ThreadSafe("speech", invertedIndexOffset); trie1->addKeyword_ThreadSafe("000000000000000000000", invertedIndexOffset); trie1->addKeyword_ThreadSafe("00000000000000000000", invertedIndexOffset); trie1->addKeyword_ThreadSafe("0000000000000000000", invertedIndexOffset); trie1->addKeyword_ThreadSafe("00000000000000000", invertedIndexOffset); trie1->addKeyword_ThreadSafe("0000000000000000022", invertedIndexOffset); trie1->addKeyword_ThreadSafe("0000000000000000", invertedIndexOffset); trie1->addKeyword_ThreadSafe("000000000000022", invertedIndexOffset); trie1->addKeyword_ThreadSafe("00000000000", invertedIndexOffset); trie1->addKeyword_ThreadSafe("0000000000088", invertedIndexOffset); map<TrieNode *, unsigned> trieNodeIdMapper; trie1->reassignKeywordIds(trieNodeIdMapper); for (map<TrieNode *, unsigned>::iterator iter = trieNodeIdMapper.begin(); iter != trieNodeIdMapper.end(); ++ iter) { TrieNode *node = iter->first; unsigned newKeywordId = iter->second; //std::cout << "ForwardIndex:reassign, " << node->getId() << " -> " << newKeywordId << std::endl; node->setId(newKeywordId); // set the new keyword Id } trie1->merge(NULL, NULL, 0, false); cout<<"\nAfter Commit and Update:\n" << std::endl; trie1->print_Trie(); delete trie1; }