TEST_F(IndexWriterReaderTest, testAddIndexesAndDoDeletesThreads) {
    int32_t numIter = 5;
    int32_t numDirs = 3;

    DirectoryPtr mainDir = newLucene<MockRAMDirectory>();
    IndexWriterPtr mainWriter = newLucene<IndexWriter>(mainDir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);
    AddDirectoriesThreadsPtr addDirThreads = newLucene<AddDirectoriesThreads>(numIter, mainWriter);
    addDirThreads->launchThreads(numDirs);
    addDirThreads->joinThreads();

    EXPECT_EQ(addDirThreads->count->intValue(), addDirThreads->mainWriter->numDocs());

    addDirThreads->close(true);

    EXPECT_TRUE(addDirThreads->failures.empty());

    checkIndex(mainDir);

    IndexReaderPtr reader = IndexReader::open(mainDir, true);
    EXPECT_EQ(addDirThreads->count->intValue(), reader->numDocs());
    reader->close();

    addDirThreads->closeDir();
    mainDir->close();
}
 virtual void doWork()
 {
     IndexReaderPtr r = IndexReader::open(directory, true);
     if (r->numDocs() != 100)
         BOOST_FAIL("num docs failure");
     r->close();
 }
Exemple #3
0
/// Deletes documents from an index that do not contain a term.
int main(int argc, char* argv[]) {
    if (argc == 1) {
        std::wcout << L"Usage: deletefiles.exe <lucene index dir> <unique_term>\n";
        return 1;
    }

    try {
        DirectoryPtr directory = FSDirectory::open(StringUtils::toUnicode(argv[1]));

        // we don't want read-only because we are about to delete
        IndexReaderPtr reader = IndexReader::open(directory, false);

        TermPtr term = newLucene<Term>(L"path", StringUtils::toUnicode(argv[2]));
        int32_t deleted = reader->deleteDocuments(term);

        std::wcout << L"Deleted " << deleted << L" documents containing " << term->toString() << L"\n";

        reader->close();
        directory->close();
    } catch (LuceneException& e) {
        std::wcout << L"Exception: " << e.getError() << L"\n";
        return 1;
    }

    return 0;
}
static void verifyNumDocs(DirectoryPtr dir, int32_t numDocs)
{
    IndexReaderPtr reader = IndexReader::open(dir, true);
    BOOST_CHECK_EQUAL(reader->maxDoc(), numDocs);
    BOOST_CHECK_EQUAL(reader->numDocs(), numDocs);
    reader->close();
}
    void createIndex(const DirectoryPtr& dir, bool multiSegment) {
        IndexWriter::unlock(dir);
        IndexWriterPtr w = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);

        w->setMergePolicy(newLucene<LogDocMergePolicy>(w));

        for (int32_t i = 0; i < 100; ++i) {
            w->addDocument(createDocument(i, 4));
            if (multiSegment && (i % 10) == 0) {
                w->commit();
            }
        }

        if (!multiSegment) {
            w->optimize();
        }

        w->close();

        IndexReaderPtr r = IndexReader::open(dir, false);
        if (multiSegment) {
            EXPECT_TRUE(r->getSequentialSubReaders().size() > 1);
        } else {
            EXPECT_EQ(r->getSequentialSubReaders().size(), 1);
        }
        r->close();
    }
static IndexReaderPtr refreshReader(IndexReaderPtr reader)
{
    IndexReaderPtr oldReader = reader;
    reader = reader->reopen();
    if (reader != oldReader)
        oldReader->close();
    return reader;
}
static void verifyTermDocs(DirectoryPtr dir, TermPtr term, int32_t numDocs)
{
    IndexReaderPtr reader = IndexReader::open(dir, true);
    TermDocsPtr termDocs = reader->termDocs(term);
    int32_t count = 0;
    while (termDocs->next())
        ++count;
    BOOST_CHECK_EQUAL(count, numDocs);
    reader->close();
}
// Run one indexer and 2 searchers against single index as stress test.
static void runTest(DirectoryPtr directory)
{
    Collection<TimedThreadPtr> threads(Collection<TimedThreadPtr>::newInstance(4));
    AnalyzerPtr analyzer = newLucene<SimpleAnalyzer>();
    
    IndexWriterPtr writer = newLucene<MockIndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED);
    
    writer->setMaxBufferedDocs(7);
    writer->setMergeFactor(3);
    
    // Establish a base index of 100 docs
    for (int32_t i = 0; i < 100; ++i)
    {
        DocumentPtr d = newLucene<Document>();
        d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
        d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED));
        if ((i - 1) % 7 == 0)
            writer->commit();
        writer->addDocument(d);
    }
    writer->commit();
    
    IndexReaderPtr r = IndexReader::open(directory, true);
    BOOST_CHECK_EQUAL(100, r->numDocs());
    r->close();

    IndexerThreadPtr indexerThread1 = newLucene<IndexerThread>(writer);
    threads[0] = indexerThread1;
    indexerThread1->start();

    IndexerThreadPtr indexerThread2 = newLucene<IndexerThread>(writer);
    threads[1] = indexerThread2;
    indexerThread2->start();

    SearcherThreadPtr searcherThread1 = newLucene<SearcherThread>(directory);
    threads[2] = searcherThread1;
    searcherThread1->start();

    SearcherThreadPtr searcherThread2 = newLucene<SearcherThread>(directory);
    threads[3] = searcherThread2;
    searcherThread2->start();
    
    indexerThread1->join();
    indexerThread2->join();
    searcherThread1->join();
    searcherThread2->join();
    
    writer->close();

    BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1
    BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2
    BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1
    BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2
}
    void doTestUndeleteAll()
    {
        sis->read(dir);
        IndexReaderPtr reader = openReader();
        BOOST_CHECK(reader);
        BOOST_CHECK_EQUAL(2, reader->numDocs());
        reader->deleteDocument(0);
        BOOST_CHECK_EQUAL(1, reader->numDocs());
        reader->undeleteAll();
        BOOST_CHECK_EQUAL(2, reader->numDocs());

        // Ensure undeleteAll survives commit/close/reopen
        reader->commit(MapStringString());
        reader->close();
        
        if (boost::dynamic_pointer_cast<MultiReader>(reader))
        {
            // MultiReader does not "own" the directory so it does not write the changes to sis on commit
            sis->commit(dir);
        }
        
        sis->read(dir);
        reader = openReader();
        BOOST_CHECK_EQUAL(2, reader->numDocs());

        reader->deleteDocument(0);
        BOOST_CHECK_EQUAL(1, reader->numDocs());
        reader->commit(MapStringString());
        reader->close();
        
        if (boost::dynamic_pointer_cast<MultiReader>(reader))
        {
            // MultiReader does not "own" the directory so it does not write the changes to sis on commit
            sis->commit(dir);
        }
        
        sis->read(dir);
        reader = openReader();
        BOOST_CHECK_EQUAL(1, reader->numDocs());
    }
    void runTest(DirectoryPtr directory, MergeSchedulerPtr merger)
    {
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED);
        writer->setMaxBufferedDocs(2);
        if (merger)
            writer->setMergeScheduler(merger);

        for (int32_t iter = 0; iter < NUM_ITER; ++iter)
        {
            int32_t iterFinal = iter;

            writer->setMergeFactor(1000);

            for (int32_t i = 0; i < 200; ++i)
            {
                DocumentPtr d = newLucene<Document>();
                d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
                d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED));
                writer->addDocument(d);
            }

            writer->setMergeFactor(4);

            Collection<LuceneThreadPtr> threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS);

            for (int32_t i = 0; i < NUM_THREADS; ++i)
            {
                int32_t iFinal = i;
                IndexWriterPtr writerFinal = writer;
                threads[i] = newLucene<OptimizeThread>(NUM_ITER2, iterFinal, iFinal, writer, writerFinal);
            }

            for (int32_t i = 0; i < NUM_THREADS; ++i)
                threads[i]->start();
            for (int32_t i = 0; i < NUM_THREADS; ++i)
                threads[i]->join();

            int32_t expectedDocCount = (int32_t)((1 + iter) * (200 + 8 * NUM_ITER2 * (int32_t)(((double)NUM_THREADS / 2.0) * (double)(1 + NUM_THREADS))));

            BOOST_CHECK_EQUAL(expectedDocCount, writer->maxDoc());

            writer->close();
            writer = newLucene<IndexWriter>(directory, analyzer, false, IndexWriter::MaxFieldLengthUNLIMITED);
            writer->setMaxBufferedDocs(2);

            IndexReaderPtr reader = IndexReader::open(directory, true);
            BOOST_CHECK(reader->isOptimized());
            BOOST_CHECK_EQUAL(expectedDocCount, reader->numDocs());
            reader->close();
        }
        writer->close();
    }
static bool verifyIndex(DirectoryPtr directory, int32_t startAt)
{
    bool fail = false;
    IndexReaderPtr reader = IndexReader::open(directory, true);

    int32_t max = reader->maxDoc();
    for (int32_t i = 0; i < max; ++i)
    {
        DocumentPtr temp = reader->document(i);
        if (temp->getField(L"count")->stringValue() != StringUtils::toString(i + startAt))
            fail = true;
    }
    reader->close();
    return fail;
}
/// Builds an index with payloads in the given Directory and performs different
/// tests to verify the payload encoding
static void encodingTest(const DirectoryPtr& dir) {
    PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>();
    IndexWriterPtr writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);

    // should be in sync with value in TermInfosWriter
    int32_t skipInterval = 16;

    int32_t numTerms = 5;
    String fieldName = L"f1";

    int32_t numDocs = skipInterval + 1;
    // create content for the test documents with just a few terms
    Collection<TermPtr> terms = generateTerms(fieldName, numTerms);
    StringStream sb;
    for (Collection<TermPtr>::iterator term = terms.begin(); term != terms.end(); ++term) {
        sb << (*term)->text() << L" ";
    }
    String content = sb.str();

    int32_t payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
    ByteArray payloadData = generateRandomData(payloadDataLength);

    DocumentPtr d = newLucene<Document>();
    d->add(newLucene<Field>(fieldName, content, Field::STORE_NO, Field::INDEX_ANALYZED));

    // add the same document multiple times to have the same payload lengths for all
    // occurrences within two consecutive skip intervals
    int32_t offset = 0;
    for (int32_t i = 0; i < 2 * numDocs; ++i) {
        analyzer->setPayloadData(fieldName, payloadData, offset, 1);
        offset += numTerms;
        writer->addDocument(d);
    }

    // make sure we create more than one segment to test merging
    writer->commit();

    for (int32_t i = 0; i < numDocs; ++i) {
        analyzer->setPayloadData(fieldName, payloadData, offset, i);
        offset += i * numTerms;
        writer->addDocument(d);
    }

    writer->optimize();
    // flush
    writer->close();

    // Verify the index
    IndexReaderPtr reader = IndexReader::open(dir, true);

    ByteArray verifyPayloadData(ByteArray::newInstance(payloadDataLength));
    offset = 0;
    Collection<TermPositionsPtr> tps = Collection<TermPositionsPtr>::newInstance(numTerms);
    for (int32_t i = 0; i < numTerms; ++i) {
        tps[i] = reader->termPositions(terms[i]);
    }

    while (tps[0]->next()) {
        for (int32_t i = 1; i < numTerms; ++i) {
            tps[i]->next();
        }
        int32_t freq = tps[0]->freq();

        for (int32_t i = 0; i < freq; ++i) {
            for (int32_t j = 0; j < numTerms; ++j) {
                tps[j]->nextPosition();
                tps[j]->getPayload(verifyPayloadData, offset);
                offset += tps[j]->getPayloadLength();
            }
        }
    }

    for (int32_t i = 0; i < numTerms; ++i) {
        tps[i]->close();
    }

    EXPECT_TRUE(payloadData.equals(verifyPayloadData));

    // test lazy skipping
    TermPositionsPtr tp = reader->termPositions(terms[0]);
    tp->next();
    tp->nextPosition();
    // now we don't read this payload
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    ByteArray payload = tp->getPayload(ByteArray(), 0);
    EXPECT_EQ(payload[0], payloadData[numTerms]);
    tp->nextPosition();

    // we don't read this payload and skip to a different document
    tp->skipTo(5);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    payload = tp->getPayload(ByteArray(), 0);
    EXPECT_EQ(payload[0], payloadData[5 * numTerms]);

    // Test different lengths at skip points
    tp->seek(terms[1]);
    tp->next();
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(2 * skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(3 * skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(3 * skipInterval - 2 * numDocs - 1, tp->getPayloadLength());

    // Test multiple call of getPayload()
    tp->getPayload(ByteArray(), 0);

    // it is forbidden to call getPayload() more than once without calling nextPosition()
    try {
        tp->getPayload(ByteArray(), 0);
    } catch (IOException& e) {
        EXPECT_TRUE(check_exception(LuceneException::IO)(e));
    }

    reader->close();

    // test long payload
    analyzer = newLucene<PayloadAnalyzer>();
    writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);
    String singleTerm = L"lucene";

    d = newLucene<Document>();
    d->add(newLucene<Field>(fieldName, singleTerm, Field::STORE_NO, Field::INDEX_ANALYZED));
    // add a payload whose length is greater than the buffer size of BufferedIndexOutput
    payloadData = generateRandomData(2000);
    analyzer->setPayloadData(fieldName, payloadData, 100, 1500);
    writer->addDocument(d);

    writer->optimize();
    // flush
    writer->close();

    reader = IndexReader::open(dir, true);
    tp = reader->termPositions(newLucene<Term>(fieldName, singleTerm));
    tp->next();
    tp->nextPosition();

    verifyPayloadData.resize(tp->getPayloadLength());
    tp->getPayload(verifyPayloadData, 0);
    ByteArray portion(ByteArray::newInstance(1500));
    MiscUtils::arrayCopy(payloadData.get(), 100, portion.get(), 0, 1500);

    EXPECT_TRUE(portion.equals(verifyPayloadData));

    reader->close();
}
    void checkSkipTo(int32_t indexDivisor) {
        DirectoryPtr dir = newLucene<RAMDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        TermPtr ta = newLucene<Term>(L"content", L"aaa");
        for (int32_t i = 0; i < 10; ++i) {
            addDoc(writer, L"aaa aaa aaa aaa");
        }

        TermPtr tb = newLucene<Term>(L"content", L"bbb");
        for (int32_t i = 0; i < 16; ++i) {
            addDoc(writer, L"bbb bbb bbb bbb");
        }

        TermPtr tc = newLucene<Term>(L"content", L"ccc");
        for (int32_t i = 0; i < 50; ++i) {
            addDoc(writer, L"ccc ccc ccc ccc");
        }

        // assure that we deal with a single segment
        writer->optimize();
        writer->close();

        IndexReaderPtr reader = IndexReader::open(dir, IndexDeletionPolicyPtr(), true, indexDivisor);

        TermDocsPtr tdocs = reader->termDocs();

        // without optimization (assumption skipInterval == 16)

        // with next
        tdocs->seek(ta);
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(0, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(1, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->skipTo(0));
        EXPECT_EQ(2, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(4));
        EXPECT_EQ(4, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(9));
        EXPECT_EQ(9, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(10));

        // without next
        tdocs->seek(ta);
        EXPECT_TRUE(tdocs->skipTo(0));
        EXPECT_EQ(0, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(4));
        EXPECT_EQ(4, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(9));
        EXPECT_EQ(9, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(10));

        // exactly skipInterval documents and therefore with optimization

        // with next
        tdocs->seek(tb);
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(10, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(11, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(12, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(15));
        EXPECT_EQ(15, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(24));
        EXPECT_EQ(24, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(25));
        EXPECT_EQ(25, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(26));

        // without next
        tdocs->seek(tb);
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(10, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(15));
        EXPECT_EQ(15, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(24));
        EXPECT_EQ(24, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(25));
        EXPECT_EQ(25, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(26));

        // much more than skipInterval documents and therefore with optimization

        // with next
        tdocs->seek(tc);
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(26, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(27, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(28, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(40));
        EXPECT_EQ(40, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(57));
        EXPECT_EQ(57, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(74));
        EXPECT_EQ(74, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(75));
        EXPECT_EQ(75, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(76));

        // without next
        tdocs->seek(tc);
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(26, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(40));
        EXPECT_EQ(40, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(57));
        EXPECT_EQ(57, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(74));
        EXPECT_EQ(74, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(75));
        EXPECT_EQ(75, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(76));

        tdocs->close();
        reader->close();
        dir->close();
    }
 void verifyIndex(const DirectoryPtr& dir) {
     IndexReaderPtr ir = IndexReader::open(dir, false);
     verifyIndex(ir);
     ir->close();
 }