コード例 #1
0
ファイル: IndexTestCase.cpp プロジェクト: hxfxjun/firtex2
void IndexTestCase::checkDocFreq(IndexReaderPtr& pIndexReader,
                                 const tstring& sField, 
                                 const tstring& sTerm,
                                 df_t expDf)
{
    TermReaderPtr pTermReader = pIndexReader->termReader();
    CPPUNIT_ASSERT(pTermReader);    

    StandardAnalyzer sa;
    sa.init();

    TokenViewPtr pTokens = sa.tokenize(sTerm.c_str(), sTerm.length());
    CPPUNIT_ASSERT(pTokens);
    CPPUNIT_ASSERT(pTokens->getNumTokens() == 1);
    TokenView::Iterator it = pTokens->iterator();
    TermPtr pTerm(new Term(sField, it.next().getTextValue()));

    TermPostingIteratorPtr pPost = pTermReader->seek(pTerm.get());
    CPPUNIT_ASSERT(pPost);    
    const TermMeta& termMeta = pPost->getTermMeta();
    CPPUNIT_ASSERT_EQUAL(expDf, termMeta.getDocFreq());
}
コード例 #2
0
size_t StandardStopFilter::loadWords(const tstring& sWords)
{
    m_stopwords.clear();

    size_t nWords = 0;
    StandardAnalyzer analyzer;
    analyzer.init();
    TokenViewPtr pTokens = analyzer.tokenize(sWords.c_str(), sWords.length());
    if (pTokens.isNotNull())
    {
        TokenView::Iterator it = pTokens->iterator();
        while (it.hasNext())
        {
            const Token& token = it.next();
            assert(token.getHintId() != INVALID_HINTID);
            m_stopwords.insert((termid_t)token.getHintId());

            ++nWords;
        }
    }
    return nWords;
}