C++ (Cpp) InvertedIndex::insert Examples

Programming Language: C++ (Cpp)

Class/Type: InvertedIndex

Method/Function: insert

Examples at hotexamples.com: 3

C++ (Cpp) InvertedIndex::insert - 3 examples found. These are the top rated real world C++ (Cpp) examples of InvertedIndex::insert extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

insert(3)

getPostings(2)

build(2)

buildFromCsvFile(2)

cbegin(2)

cend(2)

clear(2)

add(1)

push_back(1)

numOfDocuments(1)

knn(1)

invertedList(1)

estimateFreqSorted(1)

getInvertedListSize_ReadView(1)

ft(1)

begin(1)

estimateDocSorted(1)

end(1)

buildTreap(1)

squeeze(1)

Example #1

Show file

File: InvertedIndexTest.cpp Project: xffox/piclex

    void testOrder()
    {
        InvertedIndex index;

        const Term term1("foo");

        index.insert(term1, 0);
        index.insert(term1, 2);
        index.insert(term1, 1);
        index.insert(term1, 5);
        index.insert(term1, 4);
        index.insert(term1, 3);
        index.insert(term1, 6);

        const Postings &postings = index.getPostings(term1);

        CPPUNIT_ASSERT(postings.size() == 7);
        Postings::const_iterator iter = postings.begin();
        DocId prevId = *iter;
        ++iter;
        for(; iter != postings.end(); ++iter)
        {
            CPPUNIT_ASSERT(prevId < *iter);
            prevId = *iter;
        }
        CPPUNIT_ASSERT(prevId == 6);
    }

Example #2

Show file

File: InvertedIndexTest.cpp Project: xffox/piclex

    void testInsert()
    {
        InvertedIndex index;

        const Term term1("foo");
        index.insert(term1, 0);
        CPPUNIT_ASSERT(index.getPostings(term1).size() == 1);
        CPPUNIT_ASSERT(isContain(index.getPostings(term1), 0));

        index.insert(term1, 1);
        CPPUNIT_ASSERT(index.getPostings(term1).size() == 2);
        CPPUNIT_ASSERT(isContain(index.getPostings(term1), 0));
        CPPUNIT_ASSERT(isContain(index.getPostings(term1), 1));
        
        CPPUNIT_ASSERT(!isContain(index.getPostings(term1), 2));

        const Term term2("bar");
        index.insert(term2, 0);
        CPPUNIT_ASSERT(index.getPostings(term2).size() == 1);
        CPPUNIT_ASSERT(isContain(index.getPostings(term2), 0));

        index.insert(term2, 1);
        CPPUNIT_ASSERT(index.getPostings(term2).size() == 2);
        CPPUNIT_ASSERT(isContain(index.getPostings(term2), 0));
        CPPUNIT_ASSERT(isContain(index.getPostings(term2), 1));
        
        CPPUNIT_ASSERT(!isContain(index.getPostings(term2), 2));
    }

Example #3

Show file

File: Tokenizer.cpp Project: HuyTranQ/CS419

void Tokenizer::execute(wchar const * it , wchar const * endit , InvertedIndex & inverted_index , unsigned document_id)
{
	Token token;

	auto not_character = character_.end();
	auto not_delimiter = delimiter_.end();

	while (it != endit)
	{	//	Read a sequence of 6 words
		size_t counter = 6 - token.size();
		bool separated = false;
		for (; counter > 0; --counter)
		{	//	Eliminate all delimiters
			while (it != endit && character_.find(*it) == not_character)
			{
				if (delimiter_.find(*it) != not_delimiter)
				{	//	If it is a delimiter, eliminate all of 'em
					do
					{
						++it;
					} while (it != endit && delimiter_.find(*it) != not_delimiter);
					//	If it is a delimiter, it must not read more words
					//	=> There is no delimiter can present in the middle of a token
					separated = true;
					break;
				}
				++it;
			}

			if (it == endit)
			{	//	End of file
				separated = true;
				break;
			}

			if (separated)
				if (!token.empty())
					//	Delimiters found => must not read more words
					break;
				else
				{	//	Delimiteres found => Token is still empty => continue
					separated = false;
					++counter;
					continue;
				}

			wchar const * begin = it;
			do
			{	//	Read characters of a word in the token
				++it;
			} while (it != endit && character_.find(*it) != not_character);

			wstring origin = wstring(begin , it - begin);
			size_t length = origin.length();
			//	Lowercase the word
			for (size_t i = 0; i < length; ++i)
				origin[i] = character_.at(origin[i]);
			token.push_back(origin);
		}

		do
		{
			size_t counter;
			while (true)
			{	//	Eliminate stop words
				counter = stopword_.max_match(token);
				if (counter == 0)
					break;
				else
				{
					do
					{
						token.pop_front();
						--counter;
					} while (counter > 0);
					if (token.empty())
						break;
				}
			}
			if (token.empty())
				break;
			counter = vocabulary.max_match(token);
			if (counter > 0)
			{	//	If there is a token in vocabulary, insert it
				inverted_index.insert(token.cbegin() , token.cbegin() + counter , document_id);
				//token = Token(token.cbegin() + counter , token.cend());
				do
				{
					token.pop_front();
					--counter;
				} while (counter > 0);
			}
			else
			{	//	Otherwise, only insert the first word
				inverted_index.insert(token.cbegin() , token.cbegin() + 1 , document_id);
				//token = Token(token.cbegin() + 1 , token.cend());
				token.pop_front();
			}
		} while (separated);
	}
}