static void setUpDirs(DirectoryPtr dir, DirectoryPtr aux) { IndexWriterPtr writer; writer = newWriter(dir, true); writer->setMaxBufferedDocs(1000); // add 1000 documents in 1 segment addDocs(writer, 1000); BOOST_CHECK_EQUAL(1000, writer->maxDoc()); BOOST_CHECK_EQUAL(1, writer->getSegmentCount()); writer->close(); writer = newWriter(aux, true); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(100); writer->setMergeFactor(10); // add 30 documents in 3 segments for (int32_t i = 0; i < 3; ++i) { addDocs(writer, 10); writer->close(); writer = newWriter(aux, false); writer->setUseCompoundFile(false); // use one without a compound file writer->setMaxBufferedDocs(100); writer->setMergeFactor(10); } BOOST_CHECK_EQUAL(30, writer->maxDoc()); BOOST_CHECK_EQUAL(3, writer->getSegmentCount()); writer->close(); }
void runTest(DirectoryPtr directory, MergeSchedulerPtr merger) { IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); if (merger) writer->setMergeScheduler(merger); for (int32_t iter = 0; iter < NUM_ITER; ++iter) { int32_t iterFinal = iter; writer->setMergeFactor(1000); for (int32_t i = 0; i < 200; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } writer->setMergeFactor(4); Collection<LuceneThreadPtr> threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS); for (int32_t i = 0; i < NUM_THREADS; ++i) { int32_t iFinal = i; IndexWriterPtr writerFinal = writer; threads[i] = newLucene<OptimizeThread>(NUM_ITER2, iterFinal, iFinal, writer, writerFinal); } for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); int32_t expectedDocCount = (int32_t)((1 + iter) * (200 + 8 * NUM_ITER2 * (int32_t)(((double)NUM_THREADS / 2.0) * (double)(1 + NUM_THREADS)))); BOOST_CHECK_EQUAL(expectedDocCount, writer->maxDoc()); writer->close(); writer = newLucene<IndexWriter>(directory, analyzer, false, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); IndexReaderPtr reader = IndexReader::open(directory, true); BOOST_CHECK(reader->isOptimized()); BOOST_CHECK_EQUAL(expectedDocCount, reader->numDocs()); reader->close(); } writer->close(); }
void createIndex(int32_t numHits) { int32_t numDocs = 500; DirectoryPtr directory = newLucene<SeekCountingDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); writer->setMaxBufferedDocs(10); for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene<Document>(); String content; if (i % (numDocs / numHits) == 0) { // add a document that matches the query "term1 term2" content = term1 + L" " + term2; } else if (i % 15 == 0) { // add a document that only contains term1 content = term1 + L" " + term1; } else { // add a document that contains term2 but not term 1 content = term3 + L" " + term2; } doc->add(newLucene<Field>(field, content, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } // make sure the index has only a single segment writer->optimize(); writer->close(); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(directory); searcher = newLucene<IndexSearcher>(reader); }
void createIndex(const DirectoryPtr& dir) { IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, true, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->setSimilarity(similarityOne); iw->setUseCompoundFile(true); iw->close(); }
// Run one indexer and 2 searchers against single index as stress test. static void runTest(DirectoryPtr directory) { Collection<TimedThreadPtr> threads(Collection<TimedThreadPtr>::newInstance(4)); AnalyzerPtr analyzer = newLucene<SimpleAnalyzer>(); IndexWriterPtr writer = newLucene<MockIndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(7); writer->setMergeFactor(3); // Establish a base index of 100 docs for (int32_t i = 0; i < 100; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); if ((i - 1) % 7 == 0) writer->commit(); writer->addDocument(d); } writer->commit(); IndexReaderPtr r = IndexReader::open(directory, true); BOOST_CHECK_EQUAL(100, r->numDocs()); r->close(); IndexerThreadPtr indexerThread1 = newLucene<IndexerThread>(writer); threads[0] = indexerThread1; indexerThread1->start(); IndexerThreadPtr indexerThread2 = newLucene<IndexerThread>(writer); threads[1] = indexerThread2; indexerThread2->start(); SearcherThreadPtr searcherThread1 = newLucene<SearcherThread>(directory); threads[2] = searcherThread1; searcherThread1->start(); SearcherThreadPtr searcherThread2 = newLucene<SearcherThread>(directory); threads[3] = searcherThread2; searcherThread2->start(); indexerThread1->join(); indexerThread2->join(); searcherThread1->join(); searcherThread2->join(); writer->close(); BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1 BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2 BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1 BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2 }
void addDocs(const DirectoryPtr& dir, int32_t ndocs, bool compound) { IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->setSimilarity(similarityOne); iw->setUseCompoundFile(compound); for (int32_t i = 0; i < ndocs; ++i) { iw->addDocument(newDoc()); } iw->close(); }
static void fillIndex(DirectoryPtr dir, int32_t start, int32_t numDocs) { IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(2); writer->setMaxBufferedDocs(2); for (int32_t i = start; i < (start + numDocs); ++i) { DocumentPtr temp = newLucene<Document>(); temp->add(newLucene<Field>(L"count", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(temp); } writer->close(); }
AddDirectoriesThreads(int32_t numDirs, const IndexWriterPtr& mainWriter) { this->numDirs = numDirs; this->mainWriter = mainWriter; threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS); failures = Collection<LuceneException>::newInstance(); didClose = false; count = newLucene<HeavyAtomicInt>(0); numAddIndexesNoOptimize = newLucene<HeavyAtomicInt>(0); addDir = newLucene<MockRAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(addDir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t i = 0; i < NUM_INIT_DOCS; ++i) { DocumentPtr doc = createDocument(i, L"addindex", 4); writer->addDocument(doc); } writer->close(); readers = Collection<IndexReaderPtr>::newInstance(numDirs); for (int32_t i = 0; i < numDirs; ++i) { readers[i] = IndexReader::open(addDir, false); } }
/// Test that norms values are preserved as the index is maintained. Including separate norms. /// Including merging indexes with separate norms. Including optimize. TEST_F(IndexReaderCloneNormsTest, testNorms) { // test with a single index: index1 String indexDir1(FileUtils::joinPath(getTempDir(), L"lucenetestindex1")); DirectoryPtr dir1 = FSDirectory::open(indexDir1); IndexWriter::unlock(dir1); norms = Collection<double>::newInstance(); modifiedNorms = Collection<double>::newInstance(); createIndex(dir1); doTestNorms(dir1); // test with a single index: index2 Collection<double> norms1 = norms; Collection<double> modifiedNorms1 = modifiedNorms; int32_t numDocNorms1 = numDocNorms; norms = Collection<double>::newInstance(); modifiedNorms = Collection<double>::newInstance(); numDocNorms = 0; String indexDir2(FileUtils::joinPath(getTempDir(), L"lucenetestindex2")); DirectoryPtr dir2 = FSDirectory::open(indexDir2); createIndex(dir2); doTestNorms(dir2); // add index1 and index2 to a third index: index3 String indexDir3(FileUtils::joinPath(getTempDir(), L"lucenetestindex3")); DirectoryPtr dir3 = FSDirectory::open(indexDir3); createIndex(dir3); IndexWriterPtr iw = newLucene<IndexWriter>(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->addIndexesNoOptimize(newCollection<DirectoryPtr>(dir1, dir2)); iw->optimize(); iw->close(); norms1.addAll(norms.begin(), norms.end()); norms = norms1; modifiedNorms1.addAll(modifiedNorms.begin(), modifiedNorms.end()); modifiedNorms = modifiedNorms1; numDocNorms += numDocNorms1; // test with index3 verifyIndex(dir3); doTestNorms(dir3); // now with optimize iw = newLucene<IndexWriter>(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->optimize(); iw->close(); verifyIndex(dir3); dir1->close(); dir2->close(); dir3->close(); }