TEST_F(IndexWriterReaderTest, testAddIndexesAndDoDeletesThreads) { int32_t numIter = 5; int32_t numDirs = 3; DirectoryPtr mainDir = newLucene<MockRAMDirectory>(); IndexWriterPtr mainWriter = newLucene<IndexWriter>(mainDir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED); AddDirectoriesThreadsPtr addDirThreads = newLucene<AddDirectoriesThreads>(numIter, mainWriter); addDirThreads->launchThreads(numDirs); addDirThreads->joinThreads(); EXPECT_EQ(addDirThreads->count->intValue(), addDirThreads->mainWriter->numDocs()); addDirThreads->close(true); EXPECT_TRUE(addDirThreads->failures.empty()); checkIndex(mainDir); IndexReaderPtr reader = IndexReader::open(mainDir, true); EXPECT_EQ(addDirThreads->count->intValue(), reader->numDocs()); reader->close(); addDirThreads->closeDir(); mainDir->close(); }
virtual void doWork() { IndexReaderPtr r = IndexReader::open(directory, true); if (r->numDocs() != 100) BOOST_FAIL("num docs failure"); r->close(); }
/// Deletes documents from an index that do not contain a term. int main(int argc, char* argv[]) { if (argc == 1) { std::wcout << L"Usage: deletefiles.exe <lucene index dir> <unique_term>\n"; return 1; } try { DirectoryPtr directory = FSDirectory::open(StringUtils::toUnicode(argv[1])); // we don't want read-only because we are about to delete IndexReaderPtr reader = IndexReader::open(directory, false); TermPtr term = newLucene<Term>(L"path", StringUtils::toUnicode(argv[2])); int32_t deleted = reader->deleteDocuments(term); std::wcout << L"Deleted " << deleted << L" documents containing " << term->toString() << L"\n"; reader->close(); directory->close(); } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; } return 0; }
static void verifyNumDocs(DirectoryPtr dir, int32_t numDocs) { IndexReaderPtr reader = IndexReader::open(dir, true); BOOST_CHECK_EQUAL(reader->maxDoc(), numDocs); BOOST_CHECK_EQUAL(reader->numDocs(), numDocs); reader->close(); }
void createIndex(const DirectoryPtr& dir, bool multiSegment) { IndexWriter::unlock(dir); IndexWriterPtr w = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED); w->setMergePolicy(newLucene<LogDocMergePolicy>(w)); for (int32_t i = 0; i < 100; ++i) { w->addDocument(createDocument(i, 4)); if (multiSegment && (i % 10) == 0) { w->commit(); } } if (!multiSegment) { w->optimize(); } w->close(); IndexReaderPtr r = IndexReader::open(dir, false); if (multiSegment) { EXPECT_TRUE(r->getSequentialSubReaders().size() > 1); } else { EXPECT_EQ(r->getSequentialSubReaders().size(), 1); } r->close(); }
static IndexReaderPtr refreshReader(IndexReaderPtr reader) { IndexReaderPtr oldReader = reader; reader = reader->reopen(); if (reader != oldReader) oldReader->close(); return reader; }
static void verifyTermDocs(DirectoryPtr dir, TermPtr term, int32_t numDocs) { IndexReaderPtr reader = IndexReader::open(dir, true); TermDocsPtr termDocs = reader->termDocs(term); int32_t count = 0; while (termDocs->next()) ++count; BOOST_CHECK_EQUAL(count, numDocs); reader->close(); }
// Run one indexer and 2 searchers against single index as stress test. static void runTest(DirectoryPtr directory) { Collection<TimedThreadPtr> threads(Collection<TimedThreadPtr>::newInstance(4)); AnalyzerPtr analyzer = newLucene<SimpleAnalyzer>(); IndexWriterPtr writer = newLucene<MockIndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(7); writer->setMergeFactor(3); // Establish a base index of 100 docs for (int32_t i = 0; i < 100; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); if ((i - 1) % 7 == 0) writer->commit(); writer->addDocument(d); } writer->commit(); IndexReaderPtr r = IndexReader::open(directory, true); BOOST_CHECK_EQUAL(100, r->numDocs()); r->close(); IndexerThreadPtr indexerThread1 = newLucene<IndexerThread>(writer); threads[0] = indexerThread1; indexerThread1->start(); IndexerThreadPtr indexerThread2 = newLucene<IndexerThread>(writer); threads[1] = indexerThread2; indexerThread2->start(); SearcherThreadPtr searcherThread1 = newLucene<SearcherThread>(directory); threads[2] = searcherThread1; searcherThread1->start(); SearcherThreadPtr searcherThread2 = newLucene<SearcherThread>(directory); threads[3] = searcherThread2; searcherThread2->start(); indexerThread1->join(); indexerThread2->join(); searcherThread1->join(); searcherThread2->join(); writer->close(); BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1 BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2 BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1 BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2 }
void doTestUndeleteAll() { sis->read(dir); IndexReaderPtr reader = openReader(); BOOST_CHECK(reader); BOOST_CHECK_EQUAL(2, reader->numDocs()); reader->deleteDocument(0); BOOST_CHECK_EQUAL(1, reader->numDocs()); reader->undeleteAll(); BOOST_CHECK_EQUAL(2, reader->numDocs()); // Ensure undeleteAll survives commit/close/reopen reader->commit(MapStringString()); reader->close(); if (boost::dynamic_pointer_cast<MultiReader>(reader)) { // MultiReader does not "own" the directory so it does not write the changes to sis on commit sis->commit(dir); } sis->read(dir); reader = openReader(); BOOST_CHECK_EQUAL(2, reader->numDocs()); reader->deleteDocument(0); BOOST_CHECK_EQUAL(1, reader->numDocs()); reader->commit(MapStringString()); reader->close(); if (boost::dynamic_pointer_cast<MultiReader>(reader)) { // MultiReader does not "own" the directory so it does not write the changes to sis on commit sis->commit(dir); } sis->read(dir); reader = openReader(); BOOST_CHECK_EQUAL(1, reader->numDocs()); }
void runTest(DirectoryPtr directory, MergeSchedulerPtr merger) { IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); if (merger) writer->setMergeScheduler(merger); for (int32_t iter = 0; iter < NUM_ITER; ++iter) { int32_t iterFinal = iter; writer->setMergeFactor(1000); for (int32_t i = 0; i < 200; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } writer->setMergeFactor(4); Collection<LuceneThreadPtr> threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS); for (int32_t i = 0; i < NUM_THREADS; ++i) { int32_t iFinal = i; IndexWriterPtr writerFinal = writer; threads[i] = newLucene<OptimizeThread>(NUM_ITER2, iterFinal, iFinal, writer, writerFinal); } for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); int32_t expectedDocCount = (int32_t)((1 + iter) * (200 + 8 * NUM_ITER2 * (int32_t)(((double)NUM_THREADS / 2.0) * (double)(1 + NUM_THREADS)))); BOOST_CHECK_EQUAL(expectedDocCount, writer->maxDoc()); writer->close(); writer = newLucene<IndexWriter>(directory, analyzer, false, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); IndexReaderPtr reader = IndexReader::open(directory, true); BOOST_CHECK(reader->isOptimized()); BOOST_CHECK_EQUAL(expectedDocCount, reader->numDocs()); reader->close(); } writer->close(); }
static bool verifyIndex(DirectoryPtr directory, int32_t startAt) { bool fail = false; IndexReaderPtr reader = IndexReader::open(directory, true); int32_t max = reader->maxDoc(); for (int32_t i = 0; i < max; ++i) { DocumentPtr temp = reader->document(i); if (temp->getField(L"count")->stringValue() != StringUtils::toString(i + startAt)) fail = true; } reader->close(); return fail; }
/// Builds an index with payloads in the given Directory and performs different /// tests to verify the payload encoding static void encodingTest(const DirectoryPtr& dir) { PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>(); IndexWriterPtr writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); // should be in sync with value in TermInfosWriter int32_t skipInterval = 16; int32_t numTerms = 5; String fieldName = L"f1"; int32_t numDocs = skipInterval + 1; // create content for the test documents with just a few terms Collection<TermPtr> terms = generateTerms(fieldName, numTerms); StringStream sb; for (Collection<TermPtr>::iterator term = terms.begin(); term != terms.end(); ++term) { sb << (*term)->text() << L" "; } String content = sb.str(); int32_t payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; ByteArray payloadData = generateRandomData(payloadDataLength); DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(fieldName, content, Field::STORE_NO, Field::INDEX_ANALYZED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int32_t offset = 0; for (int32_t i = 0; i < 2 * numDocs; ++i) { analyzer->setPayloadData(fieldName, payloadData, offset, 1); offset += numTerms; writer->addDocument(d); } // make sure we create more than one segment to test merging writer->commit(); for (int32_t i = 0; i < numDocs; ++i) { analyzer->setPayloadData(fieldName, payloadData, offset, i); offset += i * numTerms; writer->addDocument(d); } writer->optimize(); // flush writer->close(); // Verify the index IndexReaderPtr reader = IndexReader::open(dir, true); ByteArray verifyPayloadData(ByteArray::newInstance(payloadDataLength)); offset = 0; Collection<TermPositionsPtr> tps = Collection<TermPositionsPtr>::newInstance(numTerms); for (int32_t i = 0; i < numTerms; ++i) { tps[i] = reader->termPositions(terms[i]); } while (tps[0]->next()) { for (int32_t i = 1; i < numTerms; ++i) { tps[i]->next(); } int32_t freq = tps[0]->freq(); for (int32_t i = 0; i < freq; ++i) { for (int32_t j = 0; j < numTerms; ++j) { tps[j]->nextPosition(); tps[j]->getPayload(verifyPayloadData, offset); offset += tps[j]->getPayloadLength(); } } } for (int32_t i = 0; i < numTerms; ++i) { tps[i]->close(); } EXPECT_TRUE(payloadData.equals(verifyPayloadData)); // test lazy skipping TermPositionsPtr tp = reader->termPositions(terms[0]); tp->next(); tp->nextPosition(); // now we don't read this payload tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); ByteArray payload = tp->getPayload(ByteArray(), 0); EXPECT_EQ(payload[0], payloadData[numTerms]); tp->nextPosition(); // we don't read this payload and skip to a different document tp->skipTo(5); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); payload = tp->getPayload(ByteArray(), 0); EXPECT_EQ(payload[0], payloadData[5 * numTerms]); // Test different lengths at skip points tp->seek(terms[1]); tp->next(); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(skipInterval - 1); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(2 * skipInterval - 1); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(3 * skipInterval - 1); tp->nextPosition(); EXPECT_EQ(3 * skipInterval - 2 * numDocs - 1, tp->getPayloadLength()); // Test multiple call of getPayload() tp->getPayload(ByteArray(), 0); // it is forbidden to call getPayload() more than once without calling nextPosition() try { tp->getPayload(ByteArray(), 0); } catch (IOException& e) { EXPECT_TRUE(check_exception(LuceneException::IO)(e)); } reader->close(); // test long payload analyzer = newLucene<PayloadAnalyzer>(); writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); String singleTerm = L"lucene"; d = newLucene<Document>(); d->add(newLucene<Field>(fieldName, singleTerm, Field::STORE_NO, Field::INDEX_ANALYZED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = generateRandomData(2000); analyzer->setPayloadData(fieldName, payloadData, 100, 1500); writer->addDocument(d); writer->optimize(); // flush writer->close(); reader = IndexReader::open(dir, true); tp = reader->termPositions(newLucene<Term>(fieldName, singleTerm)); tp->next(); tp->nextPosition(); verifyPayloadData.resize(tp->getPayloadLength()); tp->getPayload(verifyPayloadData, 0); ByteArray portion(ByteArray::newInstance(1500)); MiscUtils::arrayCopy(payloadData.get(), 100, portion.get(), 0, 1500); EXPECT_TRUE(portion.equals(verifyPayloadData)); reader->close(); }
void checkSkipTo(int32_t indexDivisor) { DirectoryPtr dir = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); TermPtr ta = newLucene<Term>(L"content", L"aaa"); for (int32_t i = 0; i < 10; ++i) { addDoc(writer, L"aaa aaa aaa aaa"); } TermPtr tb = newLucene<Term>(L"content", L"bbb"); for (int32_t i = 0; i < 16; ++i) { addDoc(writer, L"bbb bbb bbb bbb"); } TermPtr tc = newLucene<Term>(L"content", L"ccc"); for (int32_t i = 0; i < 50; ++i) { addDoc(writer, L"ccc ccc ccc ccc"); } // assure that we deal with a single segment writer->optimize(); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, IndexDeletionPolicyPtr(), true, indexDivisor); TermDocsPtr tdocs = reader->termDocs(); // without optimization (assumption skipInterval == 16) // with next tdocs->seek(ta); EXPECT_TRUE(tdocs->next()); EXPECT_EQ(0, tdocs->doc()); EXPECT_EQ(4, tdocs->freq()); EXPECT_TRUE(tdocs->next()); EXPECT_EQ(1, tdocs->doc()); EXPECT_EQ(4, tdocs->freq()); EXPECT_TRUE(tdocs->skipTo(0)); EXPECT_EQ(2, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(4)); EXPECT_EQ(4, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(9)); EXPECT_EQ(9, tdocs->doc()); EXPECT_TRUE(!tdocs->skipTo(10)); // without next tdocs->seek(ta); EXPECT_TRUE(tdocs->skipTo(0)); EXPECT_EQ(0, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(4)); EXPECT_EQ(4, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(9)); EXPECT_EQ(9, tdocs->doc()); EXPECT_TRUE(!tdocs->skipTo(10)); // exactly skipInterval documents and therefore with optimization // with next tdocs->seek(tb); EXPECT_TRUE(tdocs->next()); EXPECT_EQ(10, tdocs->doc()); EXPECT_EQ(4, tdocs->freq()); EXPECT_TRUE(tdocs->next()); EXPECT_EQ(11, tdocs->doc()); EXPECT_EQ(4, tdocs->freq()); EXPECT_TRUE(tdocs->skipTo(5)); EXPECT_EQ(12, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(15)); EXPECT_EQ(15, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(24)); EXPECT_EQ(24, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(25)); EXPECT_EQ(25, tdocs->doc()); EXPECT_TRUE(!tdocs->skipTo(26)); // without next tdocs->seek(tb); EXPECT_TRUE(tdocs->skipTo(5)); EXPECT_EQ(10, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(15)); EXPECT_EQ(15, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(24)); EXPECT_EQ(24, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(25)); EXPECT_EQ(25, tdocs->doc()); EXPECT_TRUE(!tdocs->skipTo(26)); // much more than skipInterval documents and therefore with optimization // with next tdocs->seek(tc); EXPECT_TRUE(tdocs->next()); EXPECT_EQ(26, tdocs->doc()); EXPECT_EQ(4, tdocs->freq()); EXPECT_TRUE(tdocs->next()); EXPECT_EQ(27, tdocs->doc()); EXPECT_EQ(4, tdocs->freq()); EXPECT_TRUE(tdocs->skipTo(5)); EXPECT_EQ(28, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(40)); EXPECT_EQ(40, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(57)); EXPECT_EQ(57, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(74)); EXPECT_EQ(74, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(75)); EXPECT_EQ(75, tdocs->doc()); EXPECT_TRUE(!tdocs->skipTo(76)); // without next tdocs->seek(tc); EXPECT_TRUE(tdocs->skipTo(5)); EXPECT_EQ(26, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(40)); EXPECT_EQ(40, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(57)); EXPECT_EQ(57, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(74)); EXPECT_EQ(74, tdocs->doc()); EXPECT_TRUE(tdocs->skipTo(75)); EXPECT_EQ(75, tdocs->doc()); EXPECT_TRUE(!tdocs->skipTo(76)); tdocs->close(); reader->close(); dir->close(); }
void verifyIndex(const DirectoryPtr& dir) { IndexReaderPtr ir = IndexReader::open(dir, false); verifyIndex(ir); ir->close(); }