예제 #1
0
void IndexContentTestCase::testIndexContent_DL()
{
    Index* pIndex;
    IndexReaderPtr pReader;

    const Term* pTerm;
    TermIteratorPtr pTermIter;
    int	docCount = 0;
    int	termCount = 0;
    uint32_t i;
    uint32_t indexTermId;
    string fileName;

    //Check posting list
    Path indexPath = TestHelper::getTestDataPath();
    indexPath.makeDirectory();
    indexPath.pushDirectory(_T("test_dlindex"));    
    pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL);
    auto_ptr<Index> indexPtr(pIndex);
    pReader = pIndex->acquireReader();
    TermReaderPtr pTermReader = pReader->termReader();

    pTermIter = pTermReader->termIterator("BODY");

    StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader();
    //Iterator all terms
    while(pTermIter->next())
    {
        pTerm = pTermIter->term();
		
        CPPUNIT_ASSERT(pTermReader->seek(pTerm));
				
        indexTermId = (pTerm->cast<int32_t>())->getValue();
        docCount = 0;
        TermPostingIteratorPtr pTermDocFreqs = pTermReader->termPostings();
        while(pTermDocFreqs->nextDoc())
        {
            DocumentPtr pDoc = pDocReader->document(pTermDocFreqs->doc());
            docCount++;
            // 获取文件路径
            fileName.assign(pDoc->getField("PATH")->getValue().c_str());

            TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName);
            CPPUNIT_ASSERT(pTermIdList != NULL);

            for(i = 0, termCount = 0; i < pTermIdList->getSize(); i++)
            {
                if(indexTermId == pTermIdList->getValue(i))
                {
                    termCount++;
                }
            }
			
            CPPUNIT_ASSERT_EQUAL((tf_t)termCount, pTermDocFreqs->freq());

        }//end while nextDoc()
        CPPUNIT_ASSERT_EQUAL((df_t)docCount, pTermDocFreqs->getDocFreq());
    }
    CPPUNIT_ASSERT(m_pDocScanner->getTotalTermCount() == pReader->getNumTerms());
}
예제 #2
0
void QueryResult::init(const FieldSelector& selector, 
                       const IndexReaderPtr& pIndexReader,
                       const QueryHits& hits)
{
    StoredFieldsReaderPtr pStoredFieldsReader = pIndexReader->createStoredFieldsReader();
    if (pStoredFieldsReader.isNull())
    {
        return;
    }

    TimeProbe probe;
    probe.start();

    m_docs.reserve(hits.size());
    setTracer(hits.getTracer());

    setTotalHits(hits.getTotalHits());

    QueryHits::Iterator it = hits.iterator();
    while (it.hasNext())
    {
        const QueryHits::HitDoc& hitDoc = it.next();
        
        ResultDocPtr pResDoc(new ResultDoc(hitDoc.getDocId(), 
                        hitDoc.getScore(), selector.size()));
        addDoc(pResDoc);

        pStoredFieldsReader->getDocument(selector, *pResDoc);
    }

    probe.stop();
    FX_QUERY_TRACE(INFO, getTracer(), "fetch field time [%d] ms",
                   (int32_t)probe.elapsed() / 1000);
}
예제 #3
0
void IndexTestCase::testTextIndex()
{
    DocumentSchema schema;
    schema.addUnIndexedField("PATH");
    schema.addTextField("CONTENT");

    buildIndex(schema, "file1.txt, hello world.");

    tstring str = getTestPath();
    Index index;
    index.open(str, Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    CPPUNIT_ASSERT(pReader != NULL);
    TermReaderPtr pTermReader = pReader->termReader();
    CPPUNIT_ASSERT(pTermReader);

    TermIteratorPtr pTermIterator = pTermReader->termIterator("CONTENT");
    CPPUNIT_ASSERT(pTermIterator != NULL);

    while (pTermIterator->hasNext())
    {
        TermIterator::TermEntry entry = pTermIterator->next();
        const TermMeta& termMeta = entry.postingIterator->getTermMeta();
        CPPUNIT_ASSERT_EQUAL((df_t)1, termMeta.getDocFreq());
        CPPUNIT_ASSERT_EQUAL((ctf_t)1, termMeta.getCTF());
    }

    Term term("CONTENT", "hello");
    TermPostingIteratorPtr pPost = pTermReader->seek(&term);

    CPPUNIT_ASSERT(pPost);
    docid_t docId = pPost->skipTo(0);
    CPPUNIT_ASSERT_EQUAL((docid_t)0, docId);
    docId = pPost->skipTo(++docId);
    CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId);

    StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader();
    CPPUNIT_ASSERT(pDocReader);
    FieldSelector selector(pReader->getDocSchema(), true, false);
    ResultDoc resultDoc(0);
    bool ret = pDocReader->getDocument(selector, resultDoc);
    CPPUNIT_ASSERT(ret);
    CPPUNIT_ASSERT(resultDoc.size() > 0);
}
예제 #4
0
void IndexContentTestCase::testIndexContent_WL()
{
    Index* pIndex;
    IndexReaderPtr pReader;

    const Term* pTerm;
    TermIteratorPtr pTermIter;
    int	docCount = 0;
    int	termCount = 0;
    int	pos = -1;
    uint32_t indexTermId;
    string fileName;

    //Check posting list
    Path indexPath = TestHelper::getTestDataPath();
    indexPath.makeDirectory();
    indexPath.pushDirectory(_T("test_wlindex"));    
    pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL);
    auto_ptr<Index> indexPtr(pIndex);

    CPPUNIT_ASSERT(pIndex != NULL);

    pReader = pIndex->acquireReader();
    TermReaderPtr pTermReader = pReader->termReader();

    pTermIter = pTermReader->termIterator("BODY");

    StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader();
    //Iterator all terms
    while(pTermIter->next())
    {
        pTerm = pTermIter->term();

        CPPUNIT_ASSERT(pTermReader->seek(pTerm));
		
        indexTermId = (pTerm->cast<int32_t>())->getValue();
        TermPositionIteratorPtr pPositions = pTermReader->termPositions();
        docCount = 0;

        while(pPositions->nextDoc())
        {
            DocumentPtr pDoc = pDocReader->document(pPositions->doc());
            docCount++;

            fileName.assign(pDoc->getField("PATH")->getValue().c_str());

            TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName);
            CPPUNIT_ASSERT(pTermIdList != NULL);

            pos = pPositions->nextPosition();
            termCount = 0;

            while(pos != -1)
            {
                termCount++;
                CPPUNIT_ASSERT(indexTermId == pTermIdList->getValue(pos));
                pos = pPositions->nextPosition();
            }
            CPPUNIT_ASSERT(termCount == pPositions->freq());
        }//end while nextDoc()
        CPPUNIT_ASSERT(docCount == pPositions->getDocFreq());
    }
    CPPUNIT_ASSERT_EQUAL((int64_t)m_pDocScanner->getTotalTermCount(), 
                         (int64_t)pReader->getNumTerms());
}