Пример #1
0
void IndexData::reassignKeywordIds() {
	map<TrieNode *, unsigned> trieNodeIdMapper; //
	this->trie->reassignKeywordIds(trieNodeIdMapper);

	// Generating an ID mapper by iterating through the set of trie nodes whose
	// ids need to be reassigned
	// a map from temperory id to new ids, this map is used for changing forwardIndex and quadTree
	map<unsigned, unsigned> keywordIdMapper;
	for (map<TrieNode *, unsigned>::iterator iter = trieNodeIdMapper.begin();
			iter != trieNodeIdMapper.end(); ++iter) {
		TrieNode *node = iter->first;
		unsigned newKeywordId = iter->second;

		keywordIdMapper[node->getId()] = newKeywordId;

		node->setId(newKeywordId); // set the new keyword Id
	}

	map<unsigned, unsigned> processedRecordIds; // keep track of records that have been converted

	// Now we have the ID mapper.  We want to go through the trie nodes one by one.
	// For each of them, access its inverted list.  For each record,
	// use the id mapper to change the integers on the forward list.
	changeKeywordIdsOnForwardLists(trieNodeIdMapper, keywordIdMapper,
			processedRecordIds);

	// apply the ID mapper on the keyword ids of empty leaf nodes
	this->trie->applyKeywordIdMapperOnEmptyLeafNodes(keywordIdMapper);
}
Пример #2
0
void test2_ThreadSafe()
{
    Trie *trie1 = new Trie();

    unsigned invertedIndexOffset;

    //"Tom Smith and Jack Lennon come Yesterday Once More"
    trie1->addKeyword("tom", invertedIndexOffset);
    trie1->addKeyword("smith", invertedIndexOffset);
    trie1->addKeyword("and", invertedIndexOffset);
    trie1->addKeyword("jack", invertedIndexOffset);
    trie1->addKeyword("lennon", invertedIndexOffset);
    trie1->addKeyword("come", invertedIndexOffset);
    trie1->addKeyword("yesterday", invertedIndexOffset);
    trie1->addKeyword("once", invertedIndexOffset);
    trie1->addKeyword("more", invertedIndexOffset);

    /*
    record->setAttributeValue(0, "George Harris");
    record->setAttributeValue(1, "Here comes the sun");
     */
    trie1->addKeyword("george", invertedIndexOffset);
    trie1->addKeyword("harris", invertedIndexOffset);
    trie1->addKeyword("here", invertedIndexOffset);
    trie1->addKeyword("comes", invertedIndexOffset);
    trie1->addKeyword("the", invertedIndexOffset);
    trie1->addKeyword("sun", invertedIndexOffset);

    /*
    record->setAttributeValue(0, "Pink Floyd");
    record->setAttributeValue(1, "Shine on you crazy diamond");
     */
    trie1->addKeyword("pink", invertedIndexOffset);
    trie1->addKeyword("floyd", invertedIndexOffset);
    trie1->addKeyword("shine", invertedIndexOffset);
    trie1->addKeyword("on", invertedIndexOffset);
    trie1->addKeyword("you", invertedIndexOffset);
    trie1->addKeyword("crazy", invertedIndexOffset);
    trie1->addKeyword("diamond", invertedIndexOffset);

    /*
    record->setAttributeValue(0, "Uriah Hepp");
    record->setAttributeValue(1, "Come Shine away Melinda ");
     */
    trie1->addKeyword("uriah", invertedIndexOffset);
    trie1->addKeyword("hepp", invertedIndexOffset);
    trie1->addKeyword("come", invertedIndexOffset);
    trie1->addKeyword("shine", invertedIndexOffset);
    trie1->addKeyword("away", invertedIndexOffset);
    trie1->addKeyword("melinda", invertedIndexOffset);

    /*
    record->setAttributeValue(0, "Pinksyponzi Floydsyponzi");
    record->setAttributeValue(1, "Shinesyponzi on - Wish you were here");
     */
    trie1->addKeyword("pinksyponzi", invertedIndexOffset);
    trie1->addKeyword("floydsyponzi", invertedIndexOffset);
    trie1->addKeyword("shinesyponzi", invertedIndexOffset);
    trie1->addKeyword("on", invertedIndexOffset);
    trie1->addKeyword("wish", invertedIndexOffset);
    trie1->addKeyword("you", invertedIndexOffset);
    trie1->addKeyword("were", invertedIndexOffset);
    trie1->addKeyword("here", invertedIndexOffset);

    /*
    record->setAttributeValue(0, "U2 2345 Pink");
    record->setAttributeValue(1, "with or without you");
     */
    trie1->addKeyword("u2", invertedIndexOffset);
    trie1->addKeyword("2345", invertedIndexOffset);
    trie1->addKeyword("pink", invertedIndexOffset);
    trie1->addKeyword("with", invertedIndexOffset);
    trie1->addKeyword("or", invertedIndexOffset);
    trie1->addKeyword("without", invertedIndexOffset);
    trie1->addKeyword("you", invertedIndexOffset);

    /*
    record->setAttributeValue(0, "Led Zepplelin");
    record->setAttributeValue(1, "Stairway to Heaven pink floyd");
     */
    trie1->addKeyword("led", invertedIndexOffset);
    trie1->addKeyword("zepplelin", invertedIndexOffset);
    trie1->addKeyword("stairway", invertedIndexOffset);
    trie1->addKeyword("to", invertedIndexOffset);
    trie1->addKeyword("heaven", invertedIndexOffset);
    trie1->addKeyword("pink", invertedIndexOffset);
    trie1->addKeyword("floyd", invertedIndexOffset);

    /*
    record->setAttributeValue(0, "Jimi Hendrix");
    record->setAttributeValue(1, "Little wing");
     */
    trie1->addKeyword("jimi", invertedIndexOffset);
    trie1->addKeyword("hendrix", invertedIndexOffset);
    trie1->addKeyword("little", invertedIndexOffset);
    trie1->addKeyword("wing", invertedIndexOffset);
    trie1->commit();
    trie1->finalCommit_finalizeHistogramInformation(NULL, NULL, 0);
    trie1->print_Trie();

    cout<<"\nBefore Commit:" << std::endl;

    trie1->addKeyword_ThreadSafe("winger", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("wing", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("aaaa", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("qqqq", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("zzzz", invertedIndexOffset);

    trie1->addKeyword_ThreadSafe("steve", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("jobs", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("stanford", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("speech", invertedIndexOffset);

    trie1->addKeyword_ThreadSafe("000000000000000000000", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("00000000000000000000", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("0000000000000000000", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("00000000000000000", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("0000000000000000022", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("0000000000000000", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("000000000000022", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("00000000000", invertedIndexOffset);
    trie1->addKeyword_ThreadSafe("0000000000088", invertedIndexOffset);

    map<TrieNode *, unsigned> trieNodeIdMapper;
    trie1->reassignKeywordIds(trieNodeIdMapper);
    for (map<TrieNode *, unsigned>::iterator iter = trieNodeIdMapper.begin();
         iter != trieNodeIdMapper.end(); ++ iter)
    {
         TrieNode *node = iter->first;
         unsigned newKeywordId = iter->second;
         //std::cout << "ForwardIndex:reassign, " << node->getId() << " -> " << newKeywordId << std::endl;
         node->setId(newKeywordId); // set the new keyword Id
    }

    trie1->merge(NULL, NULL, 0, false);
    cout<<"\nAfter Commit and Update:\n" << std::endl;

    trie1->print_Trie();

    delete trie1;

}