Example #1
0
void IndexTestCase::testRefreshIndexWithMultiBarrel()
{
    DocumentSchema schema;
    schema.addTextField("BODY");
    
    stringstream ss;
    const size_t NUM_DOCS = 1000;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << "body" << i << " hot;";
    }
    buildIndex(schema, ss.str());
    buildIndex(schema, ss.str(), true);

    tstring str = getTestPath();
    Index index;
    index.open(str, Index::READ, NULL);
    IndexReaderPtr pIndexReader1 = index.acquireReader();
    checkDocFreq(pIndexReader1, "BODY", "hot", 2 * NUM_DOCS);

    buildIndex(schema, ss.str(), true);
    IndexReaderPtr pIndexReader2 = index.acquireReader(true);
    CPPUNIT_ASSERT(pIndexReader1 != pIndexReader2);
    
    checkDocFreq(pIndexReader2, "BODY", "hot", 3 * NUM_DOCS);
}
Example #2
0
void IndexTestCase::testKeywordIndex()
{
    DocumentSchema schema;
    schema.addUnIndexedField("PATH");
    schema.addField("Keyword", "KEYWORD", false);

    buildIndex(schema, "file1.txt, hello world.");

    tstring str = getTestPath();
    Index index;
    index.open(str, Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    CPPUNIT_ASSERT(pReader != NULL);
    TermReaderPtr pTermReader = pReader->termReader();
    CPPUNIT_ASSERT(pTermReader);
    TermIteratorPtr pTermIterator = pTermReader->termIterator("Keyword");
    CPPUNIT_ASSERT(pTermIterator != NULL);

    while (pTermIterator->hasNext())
    {
        TermIterator::TermEntry entry = pTermIterator->next();
        const TermMeta& termMeta = entry.postingIterator->getTermMeta();
        CPPUNIT_ASSERT_EQUAL((df_t)1, termMeta.getDocFreq());
        CPPUNIT_ASSERT_EQUAL((ctf_t)1, termMeta.getCTF());
    }

    Term term("Keyword", "hello world.");
    TermPostingIteratorPtr pPost = pTermReader->seek(&term);
    CPPUNIT_ASSERT(pPost);
    docid_t docId = pPost->skipTo(0);
    CPPUNIT_ASSERT_EQUAL((docid_t)0, docId);
    docId = pPost->skipTo(++docId);
    CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId);
}
void DateTimeIndexTestCase::testCTF()
{
    const size_t NUM_DOCS = 100;
    stringstream ss;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << 2009 << "-" << i % 12 + 1 << "-" << i % 27 + 1 << " " 
           << i % 24 << ":" << i % 60 << ":" << i % 60 << ";";
    }

    buildDateTimeIndex(ss.str());

    Index index;
    index.open(getIndexPath(), Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    CPPUNIT_ASSERT(pReader != NULL);
    TermReaderPtr pTermReader = pReader->termReader();
    CPPUNIT_ASSERT(pTermReader);
    TermIteratorPtr pTermIterator = pTermReader->termIterator("DateTime1");
    CPPUNIT_ASSERT(pTermIterator != NULL);

    df_t ttf = 0;
    while (pTermIterator->hasNext())
    {
        TermIterator::TermEntry entry = pTermIterator->next();
        const TermMeta& termMeta = entry.postingIterator->getTermMeta();
        CPPUNIT_ASSERT_EQUAL((df_t)1, termMeta.getDocFreq());
        CPPUNIT_ASSERT_EQUAL((ctf_t)1, termMeta.getCTF());
        ttf +=  termMeta.getDocFreq();
    }

    CPPUNIT_ASSERT_EQUAL((df_t)100, ttf);
}
void DateTimeIndexTestCase::testQuery()
{
    const size_t NUM_DOCS = 12;
    stringstream ss;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << 2009 << "-" << i % 12 + 1 << "-" << i % 27 + 1 << " " 
           << i % 24 << ":" << i % 60 << ":" << i % 60 << ";";
        if (i % 10 == 0)
        {
            ss << 2009 << "-" << i % 12 + 1 << "-" << i % 27 + 1 << " " 
               << i % 24 << ":" << i % 60 << ":" << i % 60 << ";";
        }
    }

    buildDateTimeIndex(ss.str());

    Index index;
    index.open(getIndexPath(), Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    IndexSearcher se(pReader);

    QueryParser queryParser(pReader->getAnalyzerMapper(), 
                    "DateTime1", QueryParser::OP_AND);
    QueryHitsPtr pHits = se.search("query=\'2009-2-2 1:1:1\'", queryParser);
    CPPUNIT_ASSERT(pHits);
    uint64_t uTotalHits =  pHits->getTotalHits();
    CPPUNIT_ASSERT_EQUAL((uint64_t)1, uTotalHits);

    pHits = se.search("query=\'2009-11-11 10:10:10\'", queryParser);
    CPPUNIT_ASSERT(pHits);
    uTotalHits =  pHits->getTotalHits();
    CPPUNIT_ASSERT_EQUAL((uint64_t)2, uTotalHits);
}
void IndexContentTestCase::testIndexContent_DL()
{
    Index* pIndex;
    IndexReaderPtr pReader;

    const Term* pTerm;
    TermIteratorPtr pTermIter;
    int	docCount = 0;
    int	termCount = 0;
    uint32_t i;
    uint32_t indexTermId;
    string fileName;

    //Check posting list
    Path indexPath = TestHelper::getTestDataPath();
    indexPath.makeDirectory();
    indexPath.pushDirectory(_T("test_dlindex"));    
    pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL);
    auto_ptr<Index> indexPtr(pIndex);
    pReader = pIndex->acquireReader();
    TermReaderPtr pTermReader = pReader->termReader();

    pTermIter = pTermReader->termIterator("BODY");

    StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader();
    //Iterator all terms
    while(pTermIter->next())
    {
        pTerm = pTermIter->term();
		
        CPPUNIT_ASSERT(pTermReader->seek(pTerm));
				
        indexTermId = (pTerm->cast<int32_t>())->getValue();
        docCount = 0;
        TermPostingIteratorPtr pTermDocFreqs = pTermReader->termPostings();
        while(pTermDocFreqs->nextDoc())
        {
            DocumentPtr pDoc = pDocReader->document(pTermDocFreqs->doc());
            docCount++;
            // 获取文件路径
            fileName.assign(pDoc->getField("PATH")->getValue().c_str());

            TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName);
            CPPUNIT_ASSERT(pTermIdList != NULL);

            for(i = 0, termCount = 0; i < pTermIdList->getSize(); i++)
            {
                if(indexTermId == pTermIdList->getValue(i))
                {
                    termCount++;
                }
            }
			
            CPPUNIT_ASSERT_EQUAL((tf_t)termCount, pTermDocFreqs->freq());

        }//end while nextDoc()
        CPPUNIT_ASSERT_EQUAL((df_t)docCount, pTermDocFreqs->getDocFreq());
    }
    CPPUNIT_ASSERT(m_pDocScanner->getTotalTermCount() == pReader->getNumTerms());
}
Example #6
0
void IndexTestCase::testInt32ForwardIndex()
{
    DocumentSchema schema;
    schema.addSortableField("Int32Id", FieldType::INT32, false);

    const static size_t NUM_DOCS = 1000;
    stringstream ss;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << i << ";";
    }

    GLOBAL_CONF().Build.buildThreadCount = 1;
    buildIndex(schema, ss.str());

    tstring str = getTestPath();
    Index index;
    index.open(str, Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    CPPUNIT_ASSERT(pReader != NULL);
    ForwardIndexIteratorPtr pForIndexIt = pReader->forwardIndexReader("Int32Id");
    CPPUNIT_ASSERT(pForIndexIt != NULL);
    Int32ForwardIndexIteratorPtr pInt32ForIndexIt =
        pForIndexIt.cast<Int32ForwardIndexIterator>();
    CPPUNIT_ASSERT(pInt32ForIndexIt != NULL);
    
    int32_t value = 0;
    docid_t docId = 0;
    for (; docId < (docid_t)NUM_DOCS; ++docId)
    {
        CPPUNIT_ASSERT(pInt32ForIndexIt->seek(docId, value));
        CPPUNIT_ASSERT_EQUAL((int32_t)docId, value);
    }
    CPPUNIT_ASSERT(!pInt32ForIndexIt->seek(docId, value));
}
void KeywordIndexTestCase::testCTF()
{
    GLOBAL_CONF().Build.buildThreadCount = 2;
    buildKeywordIndex("1, 2; 3, 4; 5, 6; 7, 8; 9, 10; "
                      "1, 2; 3, 4; 5, 6; 7, 8; 9, 10");

    Index index;
    index.open(getIndexPath(), Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    CPPUNIT_ASSERT(pReader != NULL);
    TermReaderPtr pTermReader = pReader->termReader();
    CPPUNIT_ASSERT(pTermReader);
    TermIteratorPtr pTermIterator = pTermReader->termIterator("Keyword1");
    CPPUNIT_ASSERT(pTermIterator != NULL);

    try 
    {
        while (pTermIterator->hasNext())
        {
            TermIterator::TermEntry entry = pTermIterator->next();
            const TermMeta& termMeta = entry.postingIterator->getTermMeta();
            CPPUNIT_ASSERT_EQUAL((df_t)2, termMeta.getDocFreq());
            CPPUNIT_ASSERT_EQUAL((ctf_t)2, termMeta.getCTF());
        }
    }
    catch(const FirteXException& e)
    {
        cout << "ERROR:" << e.what() << endl;
        CPPUNIT_ASSERT(false);
    } 
}
Example #8
0
void IndexTestCase::testInt32_IF()
{
    DocumentSchema schema;
    schema.addField("Int32", "INT32_IF", false);

    const static size_t NUM_DOCS = 1000;
    stringstream ss;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << (i % 100) << ";";
    }

    GLOBAL_CONF().Build.buildThreadCount = 1;
    buildIndex(schema, ss.str());

    tstring str = getTestPath();
    Index index;
    index.open(str, Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    CPPUNIT_ASSERT(pReader != NULL);
    TermReaderPtr pTermReader = pReader->termReader();
    CPPUNIT_ASSERT(pTermReader);
    TermIteratorPtr pTermIterator = pTermReader->termIterator("Int32");
    CPPUNIT_ASSERT(pTermIterator != NULL);

    while (pTermIterator->hasNext())
    {
        TermIterator::TermEntry entry = pTermIterator->next();
        const TermMeta& termMeta = entry.postingIterator->getTermMeta();
        CPPUNIT_ASSERT_EQUAL((df_t)10, termMeta.getDocFreq());
        CPPUNIT_ASSERT_EQUAL((ctf_t)10, termMeta.getCTF());
    }

    Term term("Int32", "0");
    TermPostingIteratorPtr pPost = pTermReader->seek(&term);
    CPPUNIT_ASSERT(pPost);
    docid_t docId = pPost->skipTo(0);
    CPPUNIT_ASSERT_EQUAL((docid_t)0, docId);
    docId = pPost->skipTo(901);
    CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId);

    ForwardIndexIteratorPtr pForIndexIt = pReader->forwardIndexReader("Int32");
    CPPUNIT_ASSERT(pForIndexIt != NULL);
    Int32ForwardIndexIteratorPtr pInt32ForIndexIt =
        pForIndexIt.cast<Int32ForwardIndexIterator>();
    CPPUNIT_ASSERT(pInt32ForIndexIt != NULL);
    
    int32_t value = 0;
    docId = 0;
    for (; docId < (docid_t)NUM_DOCS; ++docId)
    {
        CPPUNIT_ASSERT(pInt32ForIndexIt->seek(docId, value));
        CPPUNIT_ASSERT_EQUAL((int32_t)(docId % 100), value);
    }
    CPPUNIT_ASSERT(!pInt32ForIndexIt->seek(docId, value));
}
Example #9
0
void IndexTestCase::testIndexOptimize()
{
    GLOBAL_CONF().Merge.maxAllowedOpenFiles = 16;
    DocumentSchema schema;
    schema.addTextField("BODY");
    
    stringstream ss;
    const size_t NUM_DOCS = 1000;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << "body" << i << " hot;";
    }

    try
    {
        buildIndex(schema, ss.str());
        buildIndex(schema, ss.str(), true);
    }
    catch (const FirteXException& e)
    {
        cout << "ERROR: " << e.what() << endl;
        CPPUNIT_ASSERT(false);
    }

    tstring str = getTestPath();

    {
        Index index;
        index.open(str, Index::APPEND, NULL); 
        IndexWriterPtr pIndexWriter = index.acquireWriter();
        CPPUNIT_ASSERT(pIndexWriter != NULL);
    
        try
        {
            pIndexWriter->optimizeIndex();
        }
        catch (const FirteXException& e)
        {
            cout << "ERROR: " << e.what() << endl;
            CPPUNIT_ASSERT(false);
        }
    }
    Index index;
    try
    {
        index.open(str, Index::READ, NULL);
        IndexReaderPtr pIndexReader = index.acquireReader();
        checkDocFreq(pIndexReader, "BODY", "hot", 2 * NUM_DOCS);
    }
    catch (const FirteXException& e)
    {
        cout << "ERROR: " << e.what() << endl;
        CPPUNIT_ASSERT(false);
    }
}
Example #10
0
void DateTimeIndexTestCase::testPosting()
{
    const size_t NUM_DOCS = 100;
    vector<TimeAndDoc> timeVec;

    stringstream ss;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        int y = 2011;
        int m = (i % 12) + 1;
        int d = (i % 27) + 1;
        int h = i % 24;
        int min = i % 60;
        int s = i % 60;
        ss << y << "-" << m << "-" << d << " " 
           << h << ":" << min << ":" << s << ";";
        TimeAndDoc td;
        td.nTime = DateTimeAnalyzer::makeTime(y, m, d, h, min, s);
        td.docId = (docid_t)i;
        timeVec .push_back(td);
    }
    sort(timeVec.begin(), timeVec.end(), timeLess);

    GLOBAL_CONF().Build.buildThreadCount = 1;
    buildDateTimeIndex(ss.str());

    Index index;
    index.open(getIndexPath(), Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    TermReaderPtr pTermReader = pReader->termReader();
    TermIteratorPtr pIterator = pTermReader->termIterator("DateTime1");
    CPPUNIT_ASSERT(pIterator);

    size_t j = 0;
    while (pIterator->hasNext())
    {
        TermIterator::TermEntry entry = pIterator->next();
        const Term* pTerm = entry.term;
        const Int64Term* pTermX = pTerm->cast<int64_t>();
        int64_t nCurTerm = pTermX->getValue();
        int64_t nExpTerm = timeVec[j].nTime;
        CPPUNIT_ASSERT_EQUAL(nExpTerm, nCurTerm);

        TermPostingIteratorPtr pPostingIter = entry.postingIterator;
        CPPUNIT_ASSERT(pPostingIter != NULL);
        docid_t docId = pPostingIter->skipTo(0);
        CPPUNIT_ASSERT_EQUAL(timeVec[j].docId, docId);
        docId = pPostingIter->skipTo(docId + 1);
        CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId);
        
        j++;
    }
}
Example #11
0
void IndexTestCase::testRefreshIndex()
{
    DocumentSchema schema;
    schema.addTextField("BODY");
    
    stringstream ss;
    const size_t NUM_DOCS = 1000;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << "body" << i << " hot;";
    }
    buildIndex(schema, ss.str());

    tstring str = getTestPath();
    Index index;
    index.open(str, Index::READ, NULL);

    IndexReaderPtr pIndexReader1 = index.acquireReader();
    checkDocFreq(pIndexReader1, "BODY", "hot", NUM_DOCS);

    IndexBarrelKeeperPtr pKeeper = index.getIndexBarrelKeeper();
    CPPUNIT_ASSERT_EQUAL((size_t)1, pKeeper->getHeldCommitCount());

    buildIndex(schema, ss.str(), true);
    IndexReaderPtr pIndexReader2 = index.acquireReader(true);
    CPPUNIT_ASSERT(pIndexReader1 != pIndexReader2);

    checkDocFreq(pIndexReader2, "BODY", "hot", 2 * NUM_DOCS);

    CPPUNIT_ASSERT_EQUAL((size_t)2, pKeeper->getHeldCommitCount());

    pIndexReader1.reset();
    pIndexReader2.reset();
    buildIndex(schema, ss.str(), true);

    IndexReaderPtr pIndexReader3 = index.acquireReader(true);
    CPPUNIT_ASSERT_EQUAL((size_t)1, pKeeper->getHeldCommitCount());
}
Example #12
0
void IndexTestCase::testPrimaryKeyIndex()
{
    DocumentSchema schema;
    schema.addField("PK", "PRIMARY_KEY", false);

    const static size_t NUM_DOCS = 1000;
    stringstream ss;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << i << ";";
    }

    GLOBAL_CONF().Build.buildThreadCount = 1;

    buildIndex(schema, ss.str());

    tstring str = getTestPath();
    Index index;
    index.open(str, Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    CPPUNIT_ASSERT(pReader != NULL);
    TermReaderPtr pTermReader = pReader->termReader();
    CPPUNIT_ASSERT(pTermReader);
    TermIteratorPtr pTermIterator = pTermReader->termIterator("PK");
    CPPUNIT_ASSERT(pTermIterator != NULL);

    while (pTermIterator->hasNext())
    {
        TermIterator::TermEntry entry = pTermIterator->next();
        const TermMeta& termMeta = entry.postingIterator->getTermMeta();
        CPPUNIT_ASSERT_EQUAL((df_t)1, termMeta.getDocFreq());
        CPPUNIT_ASSERT_EQUAL((ctf_t)1, termMeta.getCTF());
    }

    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        stringstream ss2;
        ss2 << i;
        Term  term("PK", ss2.str());
        TermPostingIteratorPtr pPost = pTermReader->seek(&term);
        CPPUNIT_ASSERT(pPost);
        docid_t docId = pPost->skipTo(0);
        CPPUNIT_ASSERT_EQUAL((docid_t)i, docId);
        docId = pPost->skipTo(++docId);
        CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId);
    }
}
void StandardCollectionTestCase::checkIndex()
{
    string sIndex = TestHelper::getTestDataPath() + "/standard_collection/files_index";
    Index index;
    try
    {
        index.open(sIndex, Index::READ, NULL);

        IndexReaderPtr pReader = index.acquireReader();
        CPPUNIT_ASSERT(pReader != NULL);        
    }
    catch(const FirteXException& e)
    {
        cout << "===ERROR: " << e.what() << endl;
        CPPUNIT_ASSERT(false);
    }
}
Example #14
0
void DateTimeIndexTestCase::testRangeQuery()
{
    const size_t NUM_DOCS = 12;
    stringstream ss;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << 2009 << "-" << i % 12 + 1 << "-" << i % 27 + 1 << " " 
           << i % 24 << "/" << i % 60 << "/" << i % 60 << ";";
        /*  if (i % 10 == 0)
            {
            ss << 2009 << "-" << i % 12 + 1 << "-" << i % 27 + 1 << " " 
            << i % 24 << "/" << i % 60 << "/" << i % 60 << ";";
            }*/
    }

    buildDateTimeIndex(ss.str());

    Index index;
    index.open(getIndexPath(), Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();

    // test case for [13 TO 18]
    {   
        IndexSearcher se(pReader);
        // QueryHits* pHits = se.search("[2009-11-11 10/10/10 TO 2009-12-12 11/11/11]",
        //         "DateTime1", 100);
        // CPPUNIT_ASSERT(pHits != NULL);
        // df_t numHits =  pHits->getHitNum();
        // CPPUNIT_ASSERT_EQUAL((df_t)2, numHits);

        // QueryHits::Iterator it = pHits->iterator();
    
        // df_t n = 0;
        // while (n < numHits)
        // {
        //     CPPUNIT_ASSERT(it.hasNext());
        //     QueryHits::HitDoc* hitDoc = it.next();
        //     CPPUNIT_ASSERT(hitDoc != NULL);
        //     CPPUNIT_ASSERT_EQUAL((docid_t)10 + n, hitDoc->getDocID());
        //     n++;
        // }

        // delete pHits;    
    }
}
Example #15
0
void IndexTestCase::testTextIndex()
{
    DocumentSchema schema;
    schema.addUnIndexedField("PATH");
    schema.addTextField("CONTENT");

    buildIndex(schema, "file1.txt, hello world.");

    tstring str = getTestPath();
    Index index;
    index.open(str, Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    CPPUNIT_ASSERT(pReader != NULL);
    TermReaderPtr pTermReader = pReader->termReader();
    CPPUNIT_ASSERT(pTermReader);

    TermIteratorPtr pTermIterator = pTermReader->termIterator("CONTENT");
    CPPUNIT_ASSERT(pTermIterator != NULL);

    while (pTermIterator->hasNext())
    {
        TermIterator::TermEntry entry = pTermIterator->next();
        const TermMeta& termMeta = entry.postingIterator->getTermMeta();
        CPPUNIT_ASSERT_EQUAL((df_t)1, termMeta.getDocFreq());
        CPPUNIT_ASSERT_EQUAL((ctf_t)1, termMeta.getCTF());
    }

    Term term("CONTENT", "hello");
    TermPostingIteratorPtr pPost = pTermReader->seek(&term);

    CPPUNIT_ASSERT(pPost);
    docid_t docId = pPost->skipTo(0);
    CPPUNIT_ASSERT_EQUAL((docid_t)0, docId);
    docId = pPost->skipTo(++docId);
    CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId);

    StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader();
    CPPUNIT_ASSERT(pDocReader);
    FieldSelector selector(pReader->getDocSchema(), true, false);
    ResultDoc resultDoc(0);
    bool ret = pDocReader->getDocument(selector, resultDoc);
    CPPUNIT_ASSERT(ret);
    CPPUNIT_ASSERT(resultDoc.size() > 0);
}
Example #16
0
void ForwardIndexTestCase::testWriteAndReadWithMultiBarrels()
{
    const size_t NUM_BARRELS = 3;
    const size_t NUM_DOCS = 300;
    vector<int32_t> v1;
    vector<int64_t> v2;
    vector<float> v3;

    float fVal = 0.5;
    size_t idx = 0;
    for (size_t i = 0; i < NUM_BARRELS; ++i)
    {
        stringstream ss;
        for (size_t j = 0; j < NUM_DOCS; ++j)
        {
            ss << idx << "," << idx * 9381 << "," << fVal * idx << ";";
            v1.push_back(idx);
            v2.push_back(idx * 9381);
            v3.push_back(fVal * idx);
            ++idx;
        }
        buildForwardIndex(ss.str());
    }

    Index index;
    index.open(getIndexPath(), Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    
    ForwardIndexIteratorPtr fdIter = pReader->forwardIndexReader("Number1");
    CPPUNIT_ASSERT(fdIter != NULL);
    checkForwardIndex<int32_t>(fdIter, v1);

    fdIter = pReader->forwardIndexReader("Number2");
    CPPUNIT_ASSERT(fdIter != NULL);
    checkForwardIndex<int64_t>(fdIter, v2);

    fdIter = pReader->forwardIndexReader("Price");
    CPPUNIT_ASSERT(fdIter != NULL);
    checkForwardIndex<float>(fdIter, v3);
}
Example #17
0
void IndexTestCase::testIncrementalIndex()
{
    DocumentSchema schema;
    schema.addTextField("BODY");
    
    stringstream ss;
    const size_t NUM_DOCS = 1000;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << "body" << i << " hot;";
    }
    GLOBAL_CONF().Build.buildThreadCount = 8;
    buildIndex(schema, ss.str());
    buildIndex(schema, ss.str(), true);

    tstring str = getTestPath();
    Index index;
    index.open(str, Index::READ, NULL);
    IndexReaderPtr pIndexReader = index.acquireReader();

    checkDocFreq(pIndexReader, "BODY", "hot", 2000);
}
Example #18
0
void ForwardIndexTestCase::testWriteAndRead()
{
    const size_t NUM_DOCS = 100;
    vector<int32_t> v1;
    vector<int64_t> v2;
    vector<float> v3;

    float fVal = 0.5;
    stringstream ss;
    for (size_t i = 0; i < NUM_DOCS; ++i)
    {
        ss << i << "," << i * 9381 << "," << fVal * i << ";";
        v1.push_back(i);
        v2.push_back(i * 9381);
        v3.push_back(fVal * i);
    }

    GLOBAL_CONF().Build.buildThreadCount = 1;

    buildForwardIndex(ss.str());

    Index index;
    index.open(getIndexPath(), Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    
    ForwardIndexIteratorPtr fdIter = pReader->forwardIndexReader("Number1");
    CPPUNIT_ASSERT(fdIter != NULL);
    checkForwardIndex<int32_t>(fdIter, v1);

    fdIter = pReader->forwardIndexReader("Number2");
    CPPUNIT_ASSERT(fdIter != NULL);
    checkForwardIndex<int64_t>(fdIter, v2);

    fdIter = pReader->forwardIndexReader("Price");
    CPPUNIT_ASSERT(fdIter != NULL);
    checkForwardIndex<float>(fdIter, v3);
}
Example #19
0
void IndexTestCase::testDocumentDeletion()
{
    DocumentSchema schema;
    schema.addField("URL", "PRIMARY_KEY", true);
    schema.addTextField("BODY");
    schema.addField("MODIFIED", "INT64", true);
    
    stringstream ss1;
    const size_t NUM_DOCS = 1000;
    size_t i = 0;
    for (; i < NUM_DOCS; ++i)
    {
        ss1 << "url" << i << ", body" << i << " hot," 
            << (i * 100) % 1000 << ";";
    }
    buildIndex(schema, ss1.str());

    stringstream ss2;
    for (; i < 2 * NUM_DOCS; ++i)
    {
        ss2 << "url" << i << ", body" << i << " hot," 
            << (i * 100) % 1000 << ";";
    }

    buildIndex(schema, ss2.str(), true);

    StandardAnalyzerPtr sa(new StandardAnalyzer());
    sa->init();

    TokenViewPtr pTokens = sa->tokenize("hot", 3);
    CPPUNIT_ASSERT(pTokens);
    CPPUNIT_ASSERT(pTokens->getNumTokens() == 1);
    TokenView::Iterator it = pTokens->iterator();
    TermPtr pTerm(new Term("BODY", it.next().getTextValue()));
    
    tstring str = getTestPath();
    
    std::set<docid_t> answer;

    {
        Index index;
        index.open(str, Index::RDWR, NULL); 
        IndexWriterPtr pIndexWriter = index.acquireWriter();
        CPPUNIT_ASSERT(pIndexWriter != NULL);

        IndexReaderPtr pIndexReader = index.acquireReader();
        CPPUNIT_ASSERT(pIndexReader != NULL);

        for (size_t i = 0; i < 2 * NUM_DOCS; ++i)
        {
            stringstream ss;
            ss << "url" << i;
            if (i == 1000 || i == 1500 || i == 1505 || i == 1999)
            {
                pIndexWriter->deleteDocument(ss.str());
            }
            else
            {
                TermReaderPtr pTermReader = pIndexReader->termReader();
                TermPtr pTerm(new Term("URL", ss.str()));
                TermPostingIteratorPtr pIt = pTermReader->seek(pTerm.get());
                docid_t docId = pIt->skipTo(0);
                answer.insert(docId);
            }
        }

        TermReaderPtr pTermReader = pIndexReader->termReader();
        TermPostingIteratorPtr pDocFreqs = pTermReader->seek(pTerm.get());
        CPPUNIT_ASSERT(pDocFreqs);

        CPPUNIT_ASSERT_EQUAL((df_t)NUM_DOCS * 2, pDocFreqs->getTermMeta().getDocFreq());

        std::set<docid_t>::const_iterator it = answer.begin();
        for (docid_t i = 0; i < (docid_t)(2 * NUM_DOCS); )
        {        
            docid_t docId = pDocFreqs->skipTo((docid_t)i);
            i = docId + 1;
            if (docId == INVALID_DOCID)
            {
                break;
            }
            CPPUNIT_ASSERT_EQUAL(*it, docId);
            ++it;
        }
        CPPUNIT_ASSERT(it == answer.end());
    }

    {
        Index index;
        index.open(str, Index::READ, NULL); 
        IndexReaderPtr pIndexReader = index.acquireReader();
        CPPUNIT_ASSERT(pIndexReader != NULL);

        TermReaderPtr pTermReader = pIndexReader->termReader();
        TermPostingIteratorPtr pDocFreqs = pTermReader->seek(pTerm.get());
        CPPUNIT_ASSERT(pDocFreqs);

        CPPUNIT_ASSERT_EQUAL((df_t)(2 * NUM_DOCS), pDocFreqs->getTermMeta().getDocFreq());
        std::set<docid_t>::const_iterator it = answer.begin();
        for (docid_t i = 0; i < (docid_t)(2 * NUM_DOCS); )
        {        
            docid_t docId = pDocFreqs->skipTo((docid_t)i);
            i = docId + 1;
            if (docId == INVALID_DOCID)
            {
                break;
            }
            CPPUNIT_ASSERT_EQUAL(*it, docId);
            ++it;
        }
        CPPUNIT_ASSERT(it == answer.end());

        // for (std::set<docid_t>::const_iterator it = answer.begin();
        //      it != answer.end(); ++it)
        // {
        //     docid_t docId = pDocFreqs->skipTo(*it);
        //     CPPUNIT_ASSERT_EQUAL(*it, docId);
        // }

        // docid_t docId = pDocFreqs->skipTo(NUM_DOCS + 0);
        // CPPUNIT_ASSERT_EQUAL((docid_t)NUM_DOCS + 1, docId);
        // docId = pDocFreqs->skipTo(NUM_DOCS + 500);
        // CPPUNIT_ASSERT_EQUAL((docid_t)NUM_DOCS + 501, docId);
        // docId = pDocFreqs->skipTo(NUM_DOCS + 505);
        // CPPUNIT_ASSERT_EQUAL((docid_t)NUM_DOCS + 506, docId);
        // docId = pDocFreqs->skipTo(2 * NUM_DOCS - 1);
        // CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId);
    }
}
void IndexContentTestCase::testIndexContent_WL()
{
    Index* pIndex;
    IndexReaderPtr pReader;

    const Term* pTerm;
    TermIteratorPtr pTermIter;
    int	docCount = 0;
    int	termCount = 0;
    int	pos = -1;
    uint32_t indexTermId;
    string fileName;

    //Check posting list
    Path indexPath = TestHelper::getTestDataPath();
    indexPath.makeDirectory();
    indexPath.pushDirectory(_T("test_wlindex"));    
    pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL);
    auto_ptr<Index> indexPtr(pIndex);

    CPPUNIT_ASSERT(pIndex != NULL);

    pReader = pIndex->acquireReader();
    TermReaderPtr pTermReader = pReader->termReader();

    pTermIter = pTermReader->termIterator("BODY");

    StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader();
    //Iterator all terms
    while(pTermIter->next())
    {
        pTerm = pTermIter->term();

        CPPUNIT_ASSERT(pTermReader->seek(pTerm));
		
        indexTermId = (pTerm->cast<int32_t>())->getValue();
        TermPositionIteratorPtr pPositions = pTermReader->termPositions();
        docCount = 0;

        while(pPositions->nextDoc())
        {
            DocumentPtr pDoc = pDocReader->document(pPositions->doc());
            docCount++;

            fileName.assign(pDoc->getField("PATH")->getValue().c_str());

            TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName);
            CPPUNIT_ASSERT(pTermIdList != NULL);

            pos = pPositions->nextPosition();
            termCount = 0;

            while(pos != -1)
            {
                termCount++;
                CPPUNIT_ASSERT(indexTermId == pTermIdList->getValue(pos));
                pos = pPositions->nextPosition();
            }
            CPPUNIT_ASSERT(termCount == pPositions->freq());
        }//end while nextDoc()
        CPPUNIT_ASSERT(docCount == pPositions->getDocFreq());
    }
    CPPUNIT_ASSERT_EQUAL((int64_t)m_pDocScanner->getTotalTermCount(), 
                         (int64_t)pReader->getNumTerms());
}
void KeywordIndexTestCase::testPosting()
{
    const size_t NUM_DOCS = 100;

    vector<HashAndDoc> hashVec;
    stringstream ss;
    size_t i;
    for (i = 0; i < NUM_DOCS; ++i)
    {
        stringstream ss1;
        ss1 << "abc" << i;
        ss << ss1.str() << ", " << "abc" << ";";
        HashAndDoc hd;
        hd.nHash = Hash::hashString64(ss1.str().c_str());
        hd.docId = (docid_t)i;
        hashVec.push_back(hd);
    }
    GLOBAL_CONF().Build.buildThreadCount = 1;
    buildKeywordIndex(ss.str());

    Index index;
    index.open(getIndexPath(), Index::READ, NULL);
    IndexReaderPtr pReader = index.acquireReader();
    TermReaderPtr pTermReader = pReader->termReader();
    TermIteratorPtr pIterator = pTermReader->termIterator("Keyword1");
    CPPUNIT_ASSERT(!pIterator.isNull());

    sort(hashVec.begin(), hashVec.end(), hashLess);
    int32_t j = 0;
    while (pIterator->hasNext())
    {
        TermIterator::TermEntry entry = pIterator->next();

        const Term* pTerm = entry.term;
        const UInt64Term* pTermX = pTerm->cast<uint64_t>();
        uint64_t nCurTerm = pTermX->getValue();
        uint64_t nExpTerm = hashVec[j].nHash;
        CPPUNIT_ASSERT_EQUAL(nExpTerm, nCurTerm);

        TermPostingIteratorPtr pPostingIter = entry.postingIterator;
        CPPUNIT_ASSERT(pPostingIter != NULL);
        docid_t docId = pPostingIter->skipTo(0);
        CPPUNIT_ASSERT_EQUAL(hashVec[j].docId, docId);
        docId = pPostingIter->skipTo(docId + 1);
        CPPUNIT_ASSERT_EQUAL((docid_t)INVALID_DOCID, docId);
        j++;
    } 

    //test abc term
    TermIteratorPtr pIterator2 = pTermReader->termIterator("Keyword2");
    CPPUNIT_ASSERT(!pIterator2.isNull());
    i = 0;
    while (pIterator2->hasNext())
    {
        TermIterator::TermEntry entry = pIterator2->next();

        const Term* pTerm = entry.term;
        const UInt64Term* pTermX = pTerm->cast<uint64_t>();
        uint64_t nCurTerm = pTermX->getValue();
        uint64_t nExpTerm = Hash::hashString64("abc");
        CPPUNIT_ASSERT_EQUAL(nExpTerm, nCurTerm);

        TermPostingIteratorPtr pPostingIter = entry.postingIterator;
        CPPUNIT_ASSERT(pPostingIter != NULL);
        docid_t docId = 0;
        docid_t docIdRet = 0;
        while ((docIdRet = pPostingIter->skipTo(docId)) != INVALID_DOCID)
        {
            CPPUNIT_ASSERT_EQUAL(docId++, docIdRet);
        }
        CPPUNIT_ASSERT_EQUAL((docid_t)100, docId);
        i++;
    } 
    CPPUNIT_ASSERT_EQUAL((size_t)1, i);
}