void IndexTestCase::checkDocFreq(IndexReaderPtr& pIndexReader, const tstring& sField, const tstring& sTerm, df_t expDf) { TermReaderPtr pTermReader = pIndexReader->termReader(); CPPUNIT_ASSERT(pTermReader); StandardAnalyzer sa; sa.init(); TokenViewPtr pTokens = sa.tokenize(sTerm.c_str(), sTerm.length()); CPPUNIT_ASSERT(pTokens); CPPUNIT_ASSERT(pTokens->getNumTokens() == 1); TokenView::Iterator it = pTokens->iterator(); TermPtr pTerm(new Term(sField, it.next().getTextValue())); TermPostingIteratorPtr pPost = pTermReader->seek(pTerm.get()); CPPUNIT_ASSERT(pPost); const TermMeta& termMeta = pPost->getTermMeta(); CPPUNIT_ASSERT_EQUAL(expDf, termMeta.getDocFreq()); }
size_t StandardStopFilter::loadWords(const tstring& sWords) { m_stopwords.clear(); size_t nWords = 0; StandardAnalyzer analyzer; analyzer.init(); TokenViewPtr pTokens = analyzer.tokenize(sWords.c_str(), sWords.length()); if (pTokens.isNotNull()) { TokenView::Iterator it = pTokens->iterator(); while (it.hasNext()) { const Token& token = it.next(); assert(token.getHintId() != INVALID_HINTID); m_stopwords.insert((termid_t)token.getHintId()); ++nWords; } } return nWords; }