FieldCacheSanityCheckerTestFixture()
    {
        RAMDirectoryPtr dirA = newLucene<RAMDirectory>();
        RAMDirectoryPtr dirB = newLucene<RAMDirectory>();

        IndexWriterPtr wA = newLucene<IndexWriter>(dirA, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
        IndexWriterPtr wB = newLucene<IndexWriter>(dirB, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        int64_t theLong = LLONG_MAX;
        double theDouble = DBL_MAX;
        uint8_t theByte = UCHAR_MAX;
        int32_t theInt = INT_MAX;
        for (int32_t i = 0; i < NUM_DOCS; ++i)
        {
            DocumentPtr doc = newLucene<Document>();
            doc->add(newLucene<Field>(L"theLong", StringUtils::toString(theLong--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            doc->add(newLucene<Field>(L"theDouble", StringUtils::toString(theDouble--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            doc->add(newLucene<Field>(L"theByte", StringUtils::toString(theByte--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            doc->add(newLucene<Field>(L"theInt", StringUtils::toString(theInt--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            if (i % 3 == 0)
                wA->addDocument(doc);
            else
                wB->addDocument(doc);
        }
        wA->close();
        wB->close();
        readerA = IndexReader::open(dirA, true);
        readerB = IndexReader::open(dirB, true);
        readerX = newLucene<MultiReader>(newCollection<IndexReaderPtr>(readerA, readerB));
    }
TEST_F(SegmentTermEnumTest, testTermEnum) {
    DirectoryPtr dir = newLucene<RAMDirectory>();
    IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

    // ADD 100 documents with term : aaa
    // add 100 documents with terms: aaa bbb
    // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100
    for (int32_t i = 0; i < 100; ++i) {
        addDoc(writer, L"aaa");
        addDoc(writer, L"aaa bbb");
    }

    writer->close();

    // verify document frequency of terms in an unoptimized index
    verifyDocFreq(dir);

    // merge segments by optimizing the index
    writer = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), false, IndexWriter::MaxFieldLengthLIMITED);
    writer->optimize();
    writer->close();

    // verify document frequency of terms in an optimized index
    verifyDocFreq(dir);
}
static void setUpDirs(DirectoryPtr dir, DirectoryPtr aux)
{
    IndexWriterPtr writer;

    writer = newWriter(dir, true);
    writer->setMaxBufferedDocs(1000);
    // add 1000 documents in 1 segment
    addDocs(writer, 1000);
    BOOST_CHECK_EQUAL(1000, writer->maxDoc());
    BOOST_CHECK_EQUAL(1, writer->getSegmentCount());
    writer->close();

    writer = newWriter(aux, true);
    writer->setUseCompoundFile(false); // use one without a compound file
    writer->setMaxBufferedDocs(100);
    writer->setMergeFactor(10);
    // add 30 documents in 3 segments
    for (int32_t i = 0; i < 3; ++i)
    {
        addDocs(writer, 10);
        writer->close();
        writer = newWriter(aux, false);
        writer->setUseCompoundFile(false); // use one without a compound file
        writer->setMaxBufferedDocs(100);
        writer->setMergeFactor(10);
    }
    BOOST_CHECK_EQUAL(30, writer->maxDoc());
    BOOST_CHECK_EQUAL(3, writer->getSegmentCount());
    writer->close();
}
Ejemplo n.º 4
0
/// Tests whether the DocumentWriter and SegmentMerger correctly enable the payload bit in the FieldInfo
TEST_F(PayloadsTest, testPayloadFieldBit) {
    DirectoryPtr ram = newLucene<RAMDirectory>();
    PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>();
    IndexWriterPtr writer = newLucene<IndexWriter>(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);
    DocumentPtr d = newLucene<Document>();
    // this field won't have any payloads
    d->add(newLucene<Field>(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED));
    // this field will have payloads in all docs, however not for all term positions,
    // so this field is used to check if the DocumentWriter correctly enables the payloads bit
    // even if only some term positions have payloads
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
    // enabled in only some documents
    d->add(newLucene<Field>(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    // only add payload data for field f2

    ByteArray someData(ByteArray::newInstance(8));
    uint8_t input[8] = { 's', 'o', 'm', 'e', 'd', 'a', 't', 'a' };
    std::memcpy(someData.get(), input, 8);

    analyzer->setPayloadData(L"f2", 1, someData, 0, 1);

    writer->addDocument(d);
    // flush
    writer->close();

    SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(ram);
    FieldInfosPtr fi = reader->fieldInfos();
    EXPECT_TRUE(!fi->fieldInfo(L"f1")->storePayloads);
    EXPECT_TRUE(fi->fieldInfo(L"f2")->storePayloads);
    EXPECT_TRUE(!fi->fieldInfo(L"f3")->storePayloads);
    reader->close();

    // now we add another document which has payloads for field f3 and verify if the SegmentMerger
    // enabled payloads for that field
    writer = newLucene<IndexWriter>(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);
    d = newLucene<Document>();
    d->add(newLucene<Field>(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    // add payload data for field f2 and f3
    analyzer->setPayloadData(L"f2", someData, 0, 1);
    analyzer->setPayloadData(L"f3", someData, 0, 3);
    writer->addDocument(d);
    // force merge
    writer->optimize();
    // flush
    writer->close();

    reader = SegmentReader::getOnlySegmentReader(ram);
    fi = reader->fieldInfos();
    EXPECT_TRUE(!fi->fieldInfo(L"f1")->storePayloads);
    EXPECT_TRUE(fi->fieldInfo(L"f2")->storePayloads);
    EXPECT_TRUE(fi->fieldInfo(L"f3")->storePayloads);
    reader->close();
}
TEST_F(IndexWriterReaderTest, testUpdateDocument) {
    bool optimize = true;

    DirectoryPtr dir1 = newLucene<MockRAMDirectory>();
    IndexWriterPtr writer = newLucene<IndexWriter>(dir1, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);

    // create the index
    createIndexNoClose(!optimize, L"index1", writer);

    // get a reader
    IndexReaderPtr r1 = writer->getReader();
    EXPECT_TRUE(r1->isCurrent());

    String id10 = r1->document(10)->getField(L"id")->stringValue();

    DocumentPtr newDoc = r1->document(10);
    newDoc->removeField(L"id");
    newDoc->add(newLucene<Field>(L"id", StringUtils::toString(8000), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
    writer->updateDocument(newLucene<Term>(L"id", id10), newDoc);
    EXPECT_TRUE(!r1->isCurrent());

    IndexReaderPtr r2 = writer->getReader();
    EXPECT_TRUE(r2->isCurrent());
    EXPECT_EQ(0, count(newLucene<Term>(L"id", id10), r2));
    EXPECT_EQ(1, count(newLucene<Term>(L"id", StringUtils::toString(8000)), r2));

    r1->close();
    writer->close();
    EXPECT_TRUE(r2->isCurrent());

    IndexReaderPtr r3 = IndexReader::open(dir1, true);
    EXPECT_TRUE(r3->isCurrent());
    EXPECT_TRUE(r2->isCurrent());
    EXPECT_EQ(0, count(newLucene<Term>(L"id", id10), r3));
    EXPECT_EQ(1, count(newLucene<Term>(L"id", StringUtils::toString(8000)), r3));

    writer = newLucene<IndexWriter>(dir1, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);
    DocumentPtr doc = newLucene<Document>();
    doc->add(newLucene<Field>(L"field", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED));
    writer->addDocument(doc);
    EXPECT_TRUE(r2->isCurrent());
    EXPECT_TRUE(r3->isCurrent());

    writer->close();

    EXPECT_TRUE(!r2->isCurrent());
    EXPECT_TRUE(!r3->isCurrent());

    r2->close();
    r3->close();

    dir1->close();
}
    void runTest(DirectoryPtr directory, MergeSchedulerPtr merger)
    {
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED);
        writer->setMaxBufferedDocs(2);
        if (merger)
            writer->setMergeScheduler(merger);

        for (int32_t iter = 0; iter < NUM_ITER; ++iter)
        {
            int32_t iterFinal = iter;

            writer->setMergeFactor(1000);

            for (int32_t i = 0; i < 200; ++i)
            {
                DocumentPtr d = newLucene<Document>();
                d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
                d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED));
                writer->addDocument(d);
            }

            writer->setMergeFactor(4);

            Collection<LuceneThreadPtr> threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS);

            for (int32_t i = 0; i < NUM_THREADS; ++i)
            {
                int32_t iFinal = i;
                IndexWriterPtr writerFinal = writer;
                threads[i] = newLucene<OptimizeThread>(NUM_ITER2, iterFinal, iFinal, writer, writerFinal);
            }

            for (int32_t i = 0; i < NUM_THREADS; ++i)
                threads[i]->start();
            for (int32_t i = 0; i < NUM_THREADS; ++i)
                threads[i]->join();

            int32_t expectedDocCount = (int32_t)((1 + iter) * (200 + 8 * NUM_ITER2 * (int32_t)(((double)NUM_THREADS / 2.0) * (double)(1 + NUM_THREADS))));

            BOOST_CHECK_EQUAL(expectedDocCount, writer->maxDoc());

            writer->close();
            writer = newLucene<IndexWriter>(directory, analyzer, false, IndexWriter::MaxFieldLengthUNLIMITED);
            writer->setMaxBufferedDocs(2);

            IndexReaderPtr reader = IndexReader::open(directory, true);
            BOOST_CHECK(reader->isOptimized());
            BOOST_CHECK_EQUAL(expectedDocCount, reader->numDocs());
            reader->close();
        }
        writer->close();
    }
Ejemplo n.º 7
0
    /// Build a randomish document for both RAMDirectory and MemoryIndex, and run all the queries against it.
    void checkAgainstRAMDirectory() {
        StringStream fooField;
        StringStream termField;

        // add up to 250 terms to field "foo"
        int32_t fieldCount = random->nextInt(250) + 1;
        for (int32_t i = 0; i < fieldCount; ++i) {
            fooField << L" " << randomTerm();
        }

        // add up to 250 terms to field "foo"
        int32_t termCount = random->nextInt(250) + 1;
        for (int32_t i = 0; i < termCount; ++i) {
            termField << L" " << randomTerm();
        }

        RAMDirectoryPtr ramdir = newLucene<RAMDirectory>();
        AnalyzerPtr analyzer = randomAnalyzer();
        IndexWriterPtr writer = newLucene<IndexWriter>(ramdir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED);
        DocumentPtr doc = newLucene<Document>();
        FieldPtr field1 = newLucene<Field>(L"foo", fooField.str(), Field::STORE_NO, Field::INDEX_ANALYZED);
        FieldPtr field2 = newLucene<Field>(L"term", termField.str(), Field::STORE_NO, Field::INDEX_ANALYZED);
        doc->add(field1);
        doc->add(field2);
        writer->addDocument(doc);
        writer->close();

        MemoryIndexPtr memory = newLucene<MemoryIndex>();
        memory->addField(L"foo", fooField.str(), analyzer);
        memory->addField(L"term", termField.str(), analyzer);
        checkAllQueries(memory, ramdir, analyzer);
    }
Ejemplo n.º 8
0
    void index(const tstring& sDir)
    {
        IndexWriterPtr pIndexWriter = m_pIndex->acquireWriter();

        DirectoryIterator di(sDir, false);
        while(di.hasNext())
        {
            const File& f = di.next();
            if(f.isFile())
            {
                BinaryFile bf;
                bf.open(f.getPath().c_str(), BinaryFile::READ);
                if(bf.isFileOpen())
                {
                    size_t nRead = (size_t)bf.getLength();
                    if (nRead > 0)
                    {
                        DocumentPtr pDoc = new Document(pIndexWriter->getDocSchema());
                        pDoc->addField(0, f.getPath().c_str());
                        char* buf = new char[nRead + 1];
                        bf.read(buf, nRead);
                        buf[nRead] = 0;
                        pDoc->addField(1, buf, nRead, false);
                        delete[] buf;
                        
                        pIndexWriter->addDocument(pDoc);
                    }
                }
            }
        }
        docPool.commit();
        pIndexWriter->close();
    }
Ejemplo n.º 9
0
void DateTimeIndexTestCase::buildDateTimeIndex(const string& sDocs)
{
    try
    {
        DocumentSchema schema;
        schema.addField("DateTime1", "DATETIME_I", true);

        Index index;
        index.open(getIndexPath(), Index::WRITE, &schema);
        IndexWriterPtr pIndexWriter = index.acquireWriter();
        
        StringTokenizer st(sDocs, ";", StringTokenizer::TOKEN_TRIM |
                           StringTokenizer::TOKEN_IGNORE_EMPTY);
        for (StringTokenizer::Iterator it = st.begin(); it != st.end(); ++it)
        {
            DocumentPtr pDoc = new Document(pIndexWriter->getDocSchema());
            pDoc->addField(0, (*it).c_str());
            pIndexWriter->addDocument(pDoc);
        }

        pIndexWriter->close();
    }
    catch (const FirteXException& )
    {
        CPPUNIT_ASSERT(false);
    }
}
    void createIndex(int32_t numHits) {
        int32_t numDocs = 500;

        DirectoryPtr directory = newLucene<SeekCountingDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
        writer->setUseCompoundFile(false);
        writer->setMaxBufferedDocs(10);
        for (int32_t i = 0; i < numDocs; ++i) {
            DocumentPtr doc = newLucene<Document>();
            String content;
            if (i % (numDocs / numHits) == 0) {
                // add a document that matches the query "term1 term2"
                content = term1 + L" " + term2;
            } else if (i % 15 == 0) {
                // add a document that only contains term1
                content = term1 + L" " + term1;
            } else {
                // add a document that contains term2 but not term 1
                content = term3 + L" " + term2;
            }

            doc->add(newLucene<Field>(field, content, Field::STORE_YES, Field::INDEX_ANALYZED));
            writer->addDocument(doc);
        }

        // make sure the index has only a single segment
        writer->optimize();
        writer->close();

        SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(directory);
        searcher = newLucene<IndexSearcher>(reader);
    }
Ejemplo n.º 11
0
TEST_F(BooleanScorerTest, testMethod) {
    static const String FIELD = L"category";

    RAMDirectoryPtr directory = newLucene<RAMDirectory>();
    Collection<String> values = newCollection<String>(L"1", L"2", L"3", L"4");

    IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
    for (int32_t i = 0; i < values.size(); ++i) {
        DocumentPtr doc = newLucene<Document>();
        doc->add(newLucene<Field>(FIELD, values[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
        writer->addDocument(doc);
    }
    writer->close();

    BooleanQueryPtr booleanQuery1 = newLucene<BooleanQuery>();
    booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"1")), BooleanClause::SHOULD);
    booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"2")), BooleanClause::SHOULD);

    BooleanQueryPtr query = newLucene<BooleanQuery>();
    query->add(booleanQuery1, BooleanClause::MUST);
    query->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"9")), BooleanClause::MUST_NOT);

    IndexSearcherPtr indexSearcher = newLucene<IndexSearcher>(directory, true);
    Collection<ScoreDocPtr> hits = indexSearcher->search(query, FilterPtr(), 1000)->scoreDocs;
    EXPECT_EQ(2, hits.size());
}
 PrefixInBooleanQueryFixture()
 {
     directory = newLucene<RAMDirectory>();
     IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
     
     for (int32_t i = 0; i < 5137; ++i)
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(FIELD, L"meaninglessnames", Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
         writer->addDocument(doc);
     }
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(FIELD, L"tangfulin", Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
         writer->addDocument(doc);
     }
     
     for (int32_t i = 5138; i < 11377; ++i)
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(FIELD, L"meaninglessnames", Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
         writer->addDocument(doc);
     }
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(FIELD, L"tangfulin", Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
         writer->addDocument(doc);
     }
     
     writer->close();
 }
TEST_F(LazyProxSkippingTest, testSeek) {
    DirectoryPtr directory = newLucene<RAMDirectory>();
    IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
    for (int32_t i = 0; i < 10; ++i) {
        DocumentPtr doc = newLucene<Document>();
        doc->add(newLucene<Field>(field, L"a b", Field::STORE_YES, Field::INDEX_ANALYZED));
        writer->addDocument(doc);
    }

    writer->close();
    IndexReaderPtr reader = IndexReader::open(directory, true);
    TermPositionsPtr tp = reader->termPositions();
    tp->seek(newLucene<Term>(field, L"b"));
    for (int32_t i = 0; i < 10; ++i) {
        tp->next();
        EXPECT_EQ(tp->doc(), i);
        EXPECT_EQ(tp->nextPosition(), 1);
    }
    tp->seek(newLucene<Term>(field, L"a"));
    for (int32_t i = 0; i < 10; ++i) {
        tp->next();
        EXPECT_EQ(tp->doc(), i);
        EXPECT_EQ(tp->nextPosition(), 0);
    }
}
    void createIndex(const DirectoryPtr& dir, bool multiSegment) {
        IndexWriter::unlock(dir);
        IndexWriterPtr w = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);

        w->setMergePolicy(newLucene<LogDocMergePolicy>(w));

        for (int32_t i = 0; i < 100; ++i) {
            w->addDocument(createDocument(i, 4));
            if (multiSegment && (i % 10) == 0) {
                w->commit();
            }
        }

        if (!multiSegment) {
            w->optimize();
        }

        w->close();

        IndexReaderPtr r = IndexReader::open(dir, false);
        if (multiSegment) {
            EXPECT_TRUE(r->getSequentialSubReaders().size() > 1);
        } else {
            EXPECT_EQ(r->getSequentialSubReaders().size(), 1);
        }
        r->close();
    }
TEST_F(IndexWriterReaderTest, testAddIndexes2) {
    bool optimize = false;

    DirectoryPtr dir1 = newLucene<MockRAMDirectory>();
    IndexWriterPtr writer = newLucene<IndexWriter>(dir1, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);

    DirectoryPtr dir2 = newLucene<MockRAMDirectory>();
    IndexWriterPtr writer2 = newLucene<IndexWriter>(dir2, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);
    createIndexNoClose(!optimize, L"index2", writer2);
    writer2->close();

    Collection<DirectoryPtr> dirs = newCollection<DirectoryPtr>(dir2);

    writer->addIndexesNoOptimize(dirs);
    writer->addIndexesNoOptimize(dirs);
    writer->addIndexesNoOptimize(dirs);
    writer->addIndexesNoOptimize(dirs);
    writer->addIndexesNoOptimize(dirs);

    IndexReaderPtr r1 = writer->getReader();
    EXPECT_EQ(500, r1->maxDoc());

    r1->close();
    writer->close();
    dir1->close();
}
 void createIndex(const DirectoryPtr& dir) {
     IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, true, IndexWriter::MaxFieldLengthLIMITED);
     iw->setMaxBufferedDocs(5);
     iw->setMergeFactor(3);
     iw->setSimilarity(similarityOne);
     iw->setUseCompoundFile(true);
     iw->close();
 }
 void addDoc(RAMDirectoryPtr ramDir1, const String& s, bool create)
 {
     IndexWriterPtr iw = newLucene<IndexWriter>(ramDir1, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), create, IndexWriter::MaxFieldLengthLIMITED);
     DocumentPtr doc = newLucene<Document>();
     doc->add(newLucene<Field>(L"body", s, Field::STORE_YES, Field::INDEX_ANALYZED));
     iw->addDocument(doc);
     iw->close();
 }
 void addDocs(const DirectoryPtr& dir, int32_t ndocs, bool compound) {
     IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, false, IndexWriter::MaxFieldLengthLIMITED);
     iw->setMaxBufferedDocs(5);
     iw->setMergeFactor(3);
     iw->setSimilarity(similarityOne);
     iw->setUseCompoundFile(compound);
     for (int32_t i = 0; i < ndocs; ++i) {
         iw->addDocument(newDoc());
     }
     iw->close();
 }
Ejemplo n.º 19
0
// Run one indexer and 2 searchers against single index as stress test.
static void runTest(DirectoryPtr directory)
{
    Collection<TimedThreadPtr> threads(Collection<TimedThreadPtr>::newInstance(4));
    AnalyzerPtr analyzer = newLucene<SimpleAnalyzer>();
    
    IndexWriterPtr writer = newLucene<MockIndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED);
    
    writer->setMaxBufferedDocs(7);
    writer->setMergeFactor(3);
    
    // Establish a base index of 100 docs
    for (int32_t i = 0; i < 100; ++i)
    {
        DocumentPtr d = newLucene<Document>();
        d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
        d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED));
        if ((i - 1) % 7 == 0)
            writer->commit();
        writer->addDocument(d);
    }
    writer->commit();
    
    IndexReaderPtr r = IndexReader::open(directory, true);
    BOOST_CHECK_EQUAL(100, r->numDocs());
    r->close();

    IndexerThreadPtr indexerThread1 = newLucene<IndexerThread>(writer);
    threads[0] = indexerThread1;
    indexerThread1->start();

    IndexerThreadPtr indexerThread2 = newLucene<IndexerThread>(writer);
    threads[1] = indexerThread2;
    indexerThread2->start();

    SearcherThreadPtr searcherThread1 = newLucene<SearcherThread>(directory);
    threads[2] = searcherThread1;
    searcherThread1->start();

    SearcherThreadPtr searcherThread2 = newLucene<SearcherThread>(directory);
    threads[3] = searcherThread2;
    searcherThread2->start();
    
    indexerThread1->join();
    indexerThread2->join();
    searcherThread1->join();
    searcherThread2->join();
    
    writer->close();

    BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1
    BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2
    BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1
    BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2
}
static void fillIndex(DirectoryPtr dir, int32_t start, int32_t numDocs)
{
    IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED);
    writer->setMergeFactor(2);
    writer->setMaxBufferedDocs(2);
    
    for (int32_t i = start; i < (start + numDocs); ++i)
    {
        DocumentPtr temp = newLucene<Document>();
        temp->add(newLucene<Field>(L"count", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
        writer->addDocument(temp);
    }
    writer->close();
}
Ejemplo n.º 21
0
 TermScorerFixture()
 {
     values = newCollection<String>(L"all", L"dogs dogs", L"like", L"playing", L"fetch", L"all");
     directory = newLucene<RAMDirectory>();
     IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
     for (int32_t i = 0; i < values.size(); ++i)
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(FIELD, values[i], Field::STORE_YES, Field::INDEX_ANALYZED));
         writer->addDocument(doc);
     }
     writer->close();
     indexSearcher = newLucene<IndexSearcher>(directory, false);
     indexReader = indexSearcher->getIndexReader();
 }
TEST_F(SegmentTermEnumTest, testPrevTermAtEnd) {
    DirectoryPtr dir = newLucene<MockRAMDirectory>();
    IndexWriterPtr writer  = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
    addDoc(writer, L"aaa bbb");
    writer->close();
    SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(dir);
    SegmentTermEnumPtr termEnum = boost::dynamic_pointer_cast<SegmentTermEnum>(reader->terms());
    EXPECT_TRUE(termEnum->next());
    EXPECT_EQ(L"aaa", termEnum->term()->text());
    EXPECT_TRUE(termEnum->next());
    EXPECT_EQ(L"aaa", termEnum->prev()->text());
    EXPECT_EQ(L"bbb", termEnum->term()->text());
    EXPECT_TRUE(!termEnum->next());
    EXPECT_EQ(L"bbb", termEnum->prev()->text());
}
 MultiThreadTermVectorsFixture()
 {
     directory = newLucene<RAMDirectory>();
     numDocs = 100;
     numThreads = 3;
     
     IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<SimpleAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
     for (int32_t i = 0; i < numDocs; ++i)
     {
         DocumentPtr doc = newLucene<Document>();
         FieldablePtr fld = newLucene<Field>(L"field", intToEnglish(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_YES);
         doc->add(fld);
         writer->addDocument(doc);
     }
     writer->close();
 }
    /// One-time setup to initialise static members
    void setup() {
        // set the theoretical maximum term count for 8bit (see docs for the number)
        BooleanQuery::setMaxClauseCount(3 * 255 * 2 + 255);

        directory = newLucene<RAMDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthUNLIMITED);

        NumericFieldPtr field8 = newLucene<NumericField>(L"field8", 8, Field::STORE_YES, true);
        NumericFieldPtr field4 = newLucene<NumericField>(L"field4", 4, Field::STORE_YES, true);
        NumericFieldPtr field2 = newLucene<NumericField>(L"field2", 2, Field::STORE_YES, true);
        NumericFieldPtr fieldNoTrie = newLucene<NumericField>(L"field" + StringUtils::toString(INT_MAX), INT_MAX, Field::STORE_YES, true);
        NumericFieldPtr ascfield8 = newLucene<NumericField>(L"ascfield8", 8, Field::STORE_NO, true);
        NumericFieldPtr ascfield4 = newLucene<NumericField>(L"ascfield4", 4, Field::STORE_NO, true);
        NumericFieldPtr ascfield2 = newLucene<NumericField>(L"ascfield2", 2, Field::STORE_NO, true);

        DocumentPtr doc = newLucene<Document>();

        // add fields, that have a distance to test general functionality
        doc->add(field8);
        doc->add(field4);
        doc->add(field2);
        doc->add(fieldNoTrie);

        // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
        doc->add(ascfield8);
        doc->add(ascfield4);
        doc->add(ascfield2);

        // Add a series of noDocs docs with increasing int values
        for (int32_t l = 0; l < noDocs; ++l) {
            int32_t val = distance * l + startOffset;
            field8->setIntValue(val);
            field4->setIntValue(val);
            field2->setIntValue(val);
            fieldNoTrie->setIntValue(val);

            val = l - (noDocs / 2);
            ascfield8->setIntValue(val);
            ascfield4->setIntValue(val);
            ascfield2->setIntValue(val);
            writer->addDocument(doc);
        }

        writer->optimize();
        writer->close();
        searcher = newLucene<IndexSearcher>(directory, true);
    }
    double checkPhraseQuery(DocumentPtr doc, PhraseQueryPtr query, int32_t slop, int32_t expectedNumResults)
    {
        query->setSlop(slop);

        RAMDirectoryPtr ramDir = newLucene<RAMDirectory>();
        WhitespaceAnalyzerPtr analyzer = newLucene<WhitespaceAnalyzer>();
        IndexWriterPtr writer = newLucene<IndexWriter>(ramDir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED);
        writer->addDocument(doc);
        writer->close();

        IndexSearcherPtr searcher = newLucene<IndexSearcher>(ramDir, true);
        TopDocsPtr td = searcher->search(query, FilterPtr(), 10);
        BOOST_CHECK_EQUAL(expectedNumResults, td->totalHits);

        searcher->close();
        ramDir->close();

        return td->maxScore;
    }
Ejemplo n.º 26
0
    DateSortTest() {
        // Create an index writer.
        directory = newLucene<RAMDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        // oldest doc:
        // Add the first document.  text = "Document 1"  dateTime = Oct 10 03:25:22 EDT 2007
        writer->addDocument(createDocument(L"Document 1", 1192001122000LL));
        // Add the second document.  text = "Document 2"  dateTime = Oct 10 03:25:26 EDT 2007
        writer->addDocument(createDocument(L"Document 2", 1192001126000LL));
        // Add the third document.  text = "Document 3"  dateTime = Oct 11 07:12:13 EDT 2007
        writer->addDocument(createDocument(L"Document 3", 1192101133000LL));
        // Add the fourth document.  text = "Document 4"  dateTime = Oct 11 08:02:09 EDT 2007
        writer->addDocument(createDocument(L"Document 4", 1192104129000LL));
        // latest doc:
        // Add the fifth document.  text = "Document 5"  dateTime = Oct 12 13:25:43 EDT 2007
        writer->addDocument(createDocument(L"Document 5", 1192209943000LL));

        writer->optimize();
        writer->close();
    }
    MultiSearcherRankingFixture()
    {
        // create MultiSearcher from two separate searchers
        DirectoryPtr d1 = newLucene<RAMDirectory>();
        IndexWriterPtr iw1 = newLucene<IndexWriter>(d1, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED);
        addCollection1(iw1);
        iw1->close();
        DirectoryPtr d2 = newLucene<RAMDirectory>();
        IndexWriterPtr iw2 = newLucene<IndexWriter>(d2, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED);
        addCollection2(iw2);
        iw2->close();

        Collection<SearchablePtr> s = newCollection<SearchablePtr>(newLucene<IndexSearcher>(d1, true), newLucene<IndexSearcher>(d2, true));
        multiSearcher = newLucene<MultiSearcher>(s);

        // create IndexSearcher which contains all documents
        DirectoryPtr d = newLucene<RAMDirectory>();
        IndexWriterPtr iw = newLucene<IndexWriter>(d, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED);
        addCollection1(iw);
        addCollection2(iw);
        iw->close();
        singleSearcher = newLucene<IndexSearcher>(d, true);
    }
    AddDirectoriesThreads(int32_t numDirs, const IndexWriterPtr& mainWriter) {
        this->numDirs = numDirs;
        this->mainWriter = mainWriter;
        threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS);
        failures = Collection<LuceneException>::newInstance();
        didClose = false;
        count = newLucene<HeavyAtomicInt>(0);
        numAddIndexesNoOptimize = newLucene<HeavyAtomicInt>(0);
        addDir = newLucene<MockRAMDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(addDir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);
        writer->setMaxBufferedDocs(2);
        for (int32_t i = 0; i < NUM_INIT_DOCS; ++i) {
            DocumentPtr doc = createDocument(i, L"addindex", 4);
            writer->addDocument(doc);
        }

        writer->close();

        readers = Collection<IndexReaderPtr>::newInstance(numDirs);
        for (int32_t i = 0; i < numDirs; ++i) {
            readers[i] = IndexReader::open(addDir, false);
        }
    }
 void close(bool doWait) {
     didClose = true;
     mainWriter->close(doWait);
 }
Ejemplo n.º 30
0
/// Builds an index with payloads in the given Directory and performs different
/// tests to verify the payload encoding
static void encodingTest(const DirectoryPtr& dir) {
    PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>();
    IndexWriterPtr writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);

    // should be in sync with value in TermInfosWriter
    int32_t skipInterval = 16;

    int32_t numTerms = 5;
    String fieldName = L"f1";

    int32_t numDocs = skipInterval + 1;
    // create content for the test documents with just a few terms
    Collection<TermPtr> terms = generateTerms(fieldName, numTerms);
    StringStream sb;
    for (Collection<TermPtr>::iterator term = terms.begin(); term != terms.end(); ++term) {
        sb << (*term)->text() << L" ";
    }
    String content = sb.str();

    int32_t payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
    ByteArray payloadData = generateRandomData(payloadDataLength);

    DocumentPtr d = newLucene<Document>();
    d->add(newLucene<Field>(fieldName, content, Field::STORE_NO, Field::INDEX_ANALYZED));

    // add the same document multiple times to have the same payload lengths for all
    // occurrences within two consecutive skip intervals
    int32_t offset = 0;
    for (int32_t i = 0; i < 2 * numDocs; ++i) {
        analyzer->setPayloadData(fieldName, payloadData, offset, 1);
        offset += numTerms;
        writer->addDocument(d);
    }

    // make sure we create more than one segment to test merging
    writer->commit();

    for (int32_t i = 0; i < numDocs; ++i) {
        analyzer->setPayloadData(fieldName, payloadData, offset, i);
        offset += i * numTerms;
        writer->addDocument(d);
    }

    writer->optimize();
    // flush
    writer->close();

    // Verify the index
    IndexReaderPtr reader = IndexReader::open(dir, true);

    ByteArray verifyPayloadData(ByteArray::newInstance(payloadDataLength));
    offset = 0;
    Collection<TermPositionsPtr> tps = Collection<TermPositionsPtr>::newInstance(numTerms);
    for (int32_t i = 0; i < numTerms; ++i) {
        tps[i] = reader->termPositions(terms[i]);
    }

    while (tps[0]->next()) {
        for (int32_t i = 1; i < numTerms; ++i) {
            tps[i]->next();
        }
        int32_t freq = tps[0]->freq();

        for (int32_t i = 0; i < freq; ++i) {
            for (int32_t j = 0; j < numTerms; ++j) {
                tps[j]->nextPosition();
                tps[j]->getPayload(verifyPayloadData, offset);
                offset += tps[j]->getPayloadLength();
            }
        }
    }

    for (int32_t i = 0; i < numTerms; ++i) {
        tps[i]->close();
    }

    EXPECT_TRUE(payloadData.equals(verifyPayloadData));

    // test lazy skipping
    TermPositionsPtr tp = reader->termPositions(terms[0]);
    tp->next();
    tp->nextPosition();
    // now we don't read this payload
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    ByteArray payload = tp->getPayload(ByteArray(), 0);
    EXPECT_EQ(payload[0], payloadData[numTerms]);
    tp->nextPosition();

    // we don't read this payload and skip to a different document
    tp->skipTo(5);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    payload = tp->getPayload(ByteArray(), 0);
    EXPECT_EQ(payload[0], payloadData[5 * numTerms]);

    // Test different lengths at skip points
    tp->seek(terms[1]);
    tp->next();
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(2 * skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(3 * skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(3 * skipInterval - 2 * numDocs - 1, tp->getPayloadLength());

    // Test multiple call of getPayload()
    tp->getPayload(ByteArray(), 0);

    // it is forbidden to call getPayload() more than once without calling nextPosition()
    try {
        tp->getPayload(ByteArray(), 0);
    } catch (IOException& e) {
        EXPECT_TRUE(check_exception(LuceneException::IO)(e));
    }

    reader->close();

    // test long payload
    analyzer = newLucene<PayloadAnalyzer>();
    writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);
    String singleTerm = L"lucene";

    d = newLucene<Document>();
    d->add(newLucene<Field>(fieldName, singleTerm, Field::STORE_NO, Field::INDEX_ANALYZED));
    // add a payload whose length is greater than the buffer size of BufferedIndexOutput
    payloadData = generateRandomData(2000);
    analyzer->setPayloadData(fieldName, payloadData, 100, 1500);
    writer->addDocument(d);

    writer->optimize();
    // flush
    writer->close();

    reader = IndexReader::open(dir, true);
    tp = reader->termPositions(newLucene<Term>(fieldName, singleTerm));
    tp->next();
    tp->nextPosition();

    verifyPayloadData.resize(tp->getPayloadLength());
    tp->getPayload(verifyPayloadData, 0);
    ByteArray portion(ByteArray::newInstance(1500));
    MiscUtils::arrayCopy(payloadData.get(), 100, portion.get(), 0, 1500);

    EXPECT_TRUE(portion.equals(verifyPayloadData));

    reader->close();
}