static void setUpDirs(DirectoryPtr dir, DirectoryPtr aux) { IndexWriterPtr writer; writer = newWriter(dir, true); writer->setMaxBufferedDocs(1000); // add 1000 documents in 1 segment addDocs(writer, 1000); BOOST_CHECK_EQUAL(1000, writer->maxDoc()); BOOST_CHECK_EQUAL(1, writer->getSegmentCount()); writer->close(); writer = newWriter(aux, true); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(100); writer->setMergeFactor(10); // add 30 documents in 3 segments for (int32_t i = 0; i < 3; ++i) { addDocs(writer, 10); writer->close(); writer = newWriter(aux, false); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(100); writer->setMergeFactor(10); } BOOST_CHECK_EQUAL(30, writer->maxDoc()); BOOST_CHECK_EQUAL(3, writer->getSegmentCount()); writer->close(); }
void runTest(DirectoryPtr directory, MergeSchedulerPtr merger) { IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); if (merger) writer->setMergeScheduler(merger); for (int32_t iter = 0; iter < NUM_ITER; ++iter) { int32_t iterFinal = iter; writer->setMergeFactor(1000); for (int32_t i = 0; i < 200; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } writer->setMergeFactor(4); Collection<LuceneThreadPtr> threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS); for (int32_t i = 0; i < NUM_THREADS; ++i) { int32_t iFinal = i; IndexWriterPtr writerFinal = writer; threads[i] = newLucene<OptimizeThread>(NUM_ITER2, iterFinal, iFinal, writer, writerFinal); } for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); int32_t expectedDocCount = (int32_t)((1 + iter) * (200 + 8 * NUM_ITER2 * (int32_t)(((double)NUM_THREADS / 2.0) * (double)(1 + NUM_THREADS)))); BOOST_CHECK_EQUAL(expectedDocCount, writer->maxDoc()); writer->close(); writer = newLucene<IndexWriter>(directory, analyzer, false, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); IndexReaderPtr reader = IndexReader::open(directory, true); BOOST_CHECK(reader->isOptimized()); BOOST_CHECK_EQUAL(expectedDocCount, reader->numDocs()); reader->close(); } writer->close(); }
void createIndex(const DirectoryPtr& dir) { IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, true, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->setSimilarity(similarityOne); iw->setUseCompoundFile(true); iw->close(); }
// Run one indexer and 2 searchers against single index as stress test. static void runTest(DirectoryPtr directory) { Collection<TimedThreadPtr> threads(Collection<TimedThreadPtr>::newInstance(4)); AnalyzerPtr analyzer = newLucene<SimpleAnalyzer>(); IndexWriterPtr writer = newLucene<MockIndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(7); writer->setMergeFactor(3); // Establish a base index of 100 docs for (int32_t i = 0; i < 100; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); if ((i - 1) % 7 == 0) writer->commit(); writer->addDocument(d); } writer->commit(); IndexReaderPtr r = IndexReader::open(directory, true); BOOST_CHECK_EQUAL(100, r->numDocs()); r->close(); IndexerThreadPtr indexerThread1 = newLucene<IndexerThread>(writer); threads[0] = indexerThread1; indexerThread1->start(); IndexerThreadPtr indexerThread2 = newLucene<IndexerThread>(writer); threads[1] = indexerThread2; indexerThread2->start(); SearcherThreadPtr searcherThread1 = newLucene<SearcherThread>(directory); threads[2] = searcherThread1; searcherThread1->start(); SearcherThreadPtr searcherThread2 = newLucene<SearcherThread>(directory); threads[3] = searcherThread2; searcherThread2->start(); indexerThread1->join(); indexerThread2->join(); searcherThread1->join(); searcherThread2->join(); writer->close(); BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1 BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2 BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1 BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2 }
void addDocs(const DirectoryPtr& dir, int32_t ndocs, bool compound) { IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->setSimilarity(similarityOne); iw->setUseCompoundFile(compound); for (int32_t i = 0; i < ndocs; ++i) { iw->addDocument(newDoc()); } iw->close(); }
static void fillIndex(DirectoryPtr dir, int32_t start, int32_t numDocs) { IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(2); writer->setMaxBufferedDocs(2); for (int32_t i = start; i < (start + numDocs); ++i) { DocumentPtr temp = newLucene<Document>(); temp->add(newLucene<Field>(L"count", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(temp); } writer->close(); }
/// Test that norms values are preserved as the index is maintained. Including separate norms. /// Including merging indexes with separate norms. Including optimize. TEST_F(IndexReaderCloneNormsTest, testNorms) { // test with a single index: index1 String indexDir1(FileUtils::joinPath(getTempDir(), L"lucenetestindex1")); DirectoryPtr dir1 = FSDirectory::open(indexDir1); IndexWriter::unlock(dir1); norms = Collection<double>::newInstance(); modifiedNorms = Collection<double>::newInstance(); createIndex(dir1); doTestNorms(dir1); // test with a single index: index2 Collection<double> norms1 = norms; Collection<double> modifiedNorms1 = modifiedNorms; int32_t numDocNorms1 = numDocNorms; norms = Collection<double>::newInstance(); modifiedNorms = Collection<double>::newInstance(); numDocNorms = 0; String indexDir2(FileUtils::joinPath(getTempDir(), L"lucenetestindex2")); DirectoryPtr dir2 = FSDirectory::open(indexDir2); createIndex(dir2); doTestNorms(dir2); // add index1 and index2 to a third index: index3 String indexDir3(FileUtils::joinPath(getTempDir(), L"lucenetestindex3")); DirectoryPtr dir3 = FSDirectory::open(indexDir3); createIndex(dir3); IndexWriterPtr iw = newLucene<IndexWriter>(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->addIndexesNoOptimize(newCollection<DirectoryPtr>(dir1, dir2)); iw->optimize(); iw->close(); norms1.addAll(norms.begin(), norms.end()); norms = norms1; modifiedNorms1.addAll(modifiedNorms.begin(), modifiedNorms.end()); modifiedNorms = modifiedNorms1; numDocNorms += numDocNorms1; // test with index3 verifyIndex(dir3); doTestNorms(dir3); // now with optimize iw = newLucene<IndexWriter>(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->optimize(); iw->close(); verifyIndex(dir3); dir1->close(); dir2->close(); dir3->close(); }