void IndexData::reassignKeywordIds() { map<TrieNode *, unsigned> trieNodeIdMapper; // this->trie->reassignKeywordIds(trieNodeIdMapper); // Generating an ID mapper by iterating through the set of trie nodes whose // ids need to be reassigned // a map from temperory id to new ids, this map is used for changing forwardIndex and quadTree map<unsigned, unsigned> keywordIdMapper; for (map<TrieNode *, unsigned>::iterator iter = trieNodeIdMapper.begin(); iter != trieNodeIdMapper.end(); ++iter) { TrieNode *node = iter->first; unsigned newKeywordId = iter->second; keywordIdMapper[node->getId()] = newKeywordId; node->setId(newKeywordId); // set the new keyword Id } map<unsigned, unsigned> processedRecordIds; // keep track of records that have been converted // Now we have the ID mapper. We want to go through the trie nodes one by one. // For each of them, access its inverted list. For each record, // use the id mapper to change the integers on the forward list. changeKeywordIdsOnForwardLists(trieNodeIdMapper, keywordIdMapper, processedRecordIds); // apply the ID mapper on the keyword ids of empty leaf nodes this->trie->applyKeywordIdMapperOnEmptyLeafNodes(keywordIdMapper); }
/* * Uses the id mapped to replace old ids to new ids in forward list. * since we use inverted index to go through all records of a keyword it is possible to visit a record more than once * so we use processedRecordIds to remember what records have been reassigned. */ void IndexData::changeKeywordIdsOnForwardLists( const map<TrieNode *, unsigned> &trieNodeIdMapper, const map<unsigned, unsigned> &keywordIdMapper, map<unsigned, unsigned> &processedRecordIds) { vectorview<unsigned>* &keywordIDsWriteView = this->invertedIndex->getKeywordIds()->getWriteView(); shared_ptr<vectorview<ForwardListPtr> > forwardListDirectoryReadView; this->forwardIndex->getForwardListDirectory_ReadView( forwardListDirectoryReadView); for (map<TrieNode *, unsigned>::const_iterator iter = trieNodeIdMapper.begin(); iter != trieNodeIdMapper.end(); ++iter) { TrieNode *node = iter->first; // the following code is based on TermVirtualList.cpp unsigned invertedListId = node->getInvertedListOffset(); // change the keywordId for a given invertedListId. "node" (leafnode) has a new keywordId keywordIDsWriteView->at(invertedListId) = node->getId(); // Since it happens after the commit of other index structures it uses read view shared_ptr<vectorview<unsigned> > readview; shared_ptr<vectorview<InvertedListContainerPtr> > invertedListDirectoryReadView; this->invertedIndex->getInvertedIndexDirectory_ReadView( invertedListDirectoryReadView); this->invertedIndex->getInvertedListReadView( invertedListDirectoryReadView, invertedListId, readview); unsigned invertedListSize = readview->size(); // go through each record id on the inverted list InvertedListElement invertedListElement; for (unsigned i = 0; i < invertedListSize; i++) { /*if (invertedListElement == NULL) continue;*/ unsigned recordId = readview->getElement(i); // re-map it only it is not done before if (processedRecordIds.find(recordId) == processedRecordIds.end()) { this->forwardIndex->reassignKeywordIds( forwardListDirectoryReadView, recordId, keywordIdMapper); processedRecordIds[recordId] = 0; // add it to the set } } } }