PrefixInBooleanQueryFixture() { directory = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 5137; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, L"meaninglessnames", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, L"tangfulin", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } for (int32_t i = 5138; i < 11377; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, L"meaninglessnames", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, L"tangfulin", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); }
FieldCacheSanityCheckerTestFixture() { RAMDirectoryPtr dirA = newLucene<RAMDirectory>(); RAMDirectoryPtr dirB = newLucene<RAMDirectory>(); IndexWriterPtr wA = newLucene<IndexWriter>(dirA, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); IndexWriterPtr wB = newLucene<IndexWriter>(dirB, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); int64_t theLong = LLONG_MAX; double theDouble = DBL_MAX; uint8_t theByte = UCHAR_MAX; int32_t theInt = INT_MAX; for (int32_t i = 0; i < NUM_DOCS; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"theLong", StringUtils::toString(theLong--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene<Field>(L"theDouble", StringUtils::toString(theDouble--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene<Field>(L"theByte", StringUtils::toString(theByte--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene<Field>(L"theInt", StringUtils::toString(theInt--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); if (i % 3 == 0) wA->addDocument(doc); else wB->addDocument(doc); } wA->close(); wB->close(); readerA = IndexReader::open(dirA, true); readerB = IndexReader::open(dirB, true); readerX = newLucene<MultiReader>(newCollection<IndexReaderPtr>(readerA, readerB)); }
/// Tests whether the DocumentWriter and SegmentMerger correctly enable the payload bit in the FieldInfo TEST_F(PayloadsTest, testPayloadFieldBit) { DirectoryPtr ram = newLucene<RAMDirectory>(); PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>(); IndexWriterPtr writer = newLucene<IndexWriter>(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d = newLucene<Document>(); // this field won't have any payloads d->add(newLucene<Field>(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d->add(newLucene<Field>(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED)); // only add payload data for field f2 ByteArray someData(ByteArray::newInstance(8)); uint8_t input[8] = { 's', 'o', 'm', 'e', 'd', 'a', 't', 'a' }; std::memcpy(someData.get(), input, 8); analyzer->setPayloadData(L"f2", 1, someData, 0, 1); writer->addDocument(d); // flush writer->close(); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(ram); FieldInfosPtr fi = reader->fieldInfos(); EXPECT_TRUE(!fi->fieldInfo(L"f1")->storePayloads); EXPECT_TRUE(fi->fieldInfo(L"f2")->storePayloads); EXPECT_TRUE(!fi->fieldInfo(L"f3")->storePayloads); reader->close(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field writer = newLucene<IndexWriter>(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); d = newLucene<Document>(); d->add(newLucene<Field>(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene<Field>(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED)); // add payload data for field f2 and f3 analyzer->setPayloadData(L"f2", someData, 0, 1); analyzer->setPayloadData(L"f3", someData, 0, 3); writer->addDocument(d); // force merge writer->optimize(); // flush writer->close(); reader = SegmentReader::getOnlySegmentReader(ram); fi = reader->fieldInfos(); EXPECT_TRUE(!fi->fieldInfo(L"f1")->storePayloads); EXPECT_TRUE(fi->fieldInfo(L"f2")->storePayloads); EXPECT_TRUE(fi->fieldInfo(L"f3")->storePayloads); reader->close(); }
/// Build a randomish document for both RAMDirectory and MemoryIndex, and run all the queries against it. void checkAgainstRAMDirectory() { StringStream fooField; StringStream termField; // add up to 250 terms to field "foo" int32_t fieldCount = random->nextInt(250) + 1; for (int32_t i = 0; i < fieldCount; ++i) { fooField << L" " << randomTerm(); } // add up to 250 terms to field "foo" int32_t termCount = random->nextInt(250) + 1; for (int32_t i = 0; i < termCount; ++i) { termField << L" " << randomTerm(); } RAMDirectoryPtr ramdir = newLucene<RAMDirectory>(); AnalyzerPtr analyzer = randomAnalyzer(); IndexWriterPtr writer = newLucene<IndexWriter>(ramdir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene<Document>(); FieldPtr field1 = newLucene<Field>(L"foo", fooField.str(), Field::STORE_NO, Field::INDEX_ANALYZED); FieldPtr field2 = newLucene<Field>(L"term", termField.str(), Field::STORE_NO, Field::INDEX_ANALYZED); doc->add(field1); doc->add(field2); writer->addDocument(doc); writer->close(); MemoryIndexPtr memory = newLucene<MemoryIndex>(); memory->addField(L"foo", fooField.str(), analyzer); memory->addField(L"term", termField.str(), analyzer); checkAllQueries(memory, ramdir, analyzer); }
void index(const tstring& sDir) { IndexWriterPtr pIndexWriter = m_pIndex->acquireWriter(); DirectoryIterator di(sDir, false); while(di.hasNext()) { const File& f = di.next(); if(f.isFile()) { BinaryFile bf; bf.open(f.getPath().c_str(), BinaryFile::READ); if(bf.isFileOpen()) { size_t nRead = (size_t)bf.getLength(); if (nRead > 0) { DocumentPtr pDoc = new Document(pIndexWriter->getDocSchema()); pDoc->addField(0, f.getPath().c_str()); char* buf = new char[nRead + 1]; bf.read(buf, nRead); buf[nRead] = 0; pDoc->addField(1, buf, nRead, false); delete[] buf; pIndexWriter->addDocument(pDoc); } } } } docPool.commit(); pIndexWriter->close(); }
TEST_F(BooleanScorerTest, testMethod) { static const String FIELD = L"category"; RAMDirectoryPtr directory = newLucene<RAMDirectory>(); Collection<String> values = newCollection<String>(L"1", L"2", L"3", L"4"); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < values.size(); ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, values[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); BooleanQueryPtr booleanQuery1 = newLucene<BooleanQuery>(); booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"1")), BooleanClause::SHOULD); booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"2")), BooleanClause::SHOULD); BooleanQueryPtr query = newLucene<BooleanQuery>(); query->add(booleanQuery1, BooleanClause::MUST); query->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"9")), BooleanClause::MUST_NOT); IndexSearcherPtr indexSearcher = newLucene<IndexSearcher>(directory, true); Collection<ScoreDocPtr> hits = indexSearcher->search(query, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(2, hits.size()); }
void DateTimeIndexTestCase::buildDateTimeIndex(const string& sDocs) { try { DocumentSchema schema; schema.addField("DateTime1", "DATETIME_I", true); Index index; index.open(getIndexPath(), Index::WRITE, &schema); IndexWriterPtr pIndexWriter = index.acquireWriter(); StringTokenizer st(sDocs, ";", StringTokenizer::TOKEN_TRIM | StringTokenizer::TOKEN_IGNORE_EMPTY); for (StringTokenizer::Iterator it = st.begin(); it != st.end(); ++it) { DocumentPtr pDoc = new Document(pIndexWriter->getDocSchema()); pDoc->addField(0, (*it).c_str()); pIndexWriter->addDocument(pDoc); } pIndexWriter->close(); } catch (const FirteXException& ) { CPPUNIT_ASSERT(false); } }
void createIndex(int32_t numHits) { int32_t numDocs = 500; DirectoryPtr directory = newLucene<SeekCountingDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); writer->setMaxBufferedDocs(10); for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene<Document>(); String content; if (i % (numDocs / numHits) == 0) { // add a document that matches the query "term1 term2" content = term1 + L" " + term2; } else if (i % 15 == 0) { // add a document that only contains term1 content = term1 + L" " + term1; } else { // add a document that contains term2 but not term 1 content = term3 + L" " + term2; } doc->add(newLucene<Field>(field, content, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } // make sure the index has only a single segment writer->optimize(); writer->close(); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(directory); searcher = newLucene<IndexSearcher>(reader); }
TEST_F(LazyProxSkippingTest, testSeek) { DirectoryPtr directory = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 10; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(field, L"a b", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); IndexReaderPtr reader = IndexReader::open(directory, true); TermPositionsPtr tp = reader->termPositions(); tp->seek(newLucene<Term>(field, L"b")); for (int32_t i = 0; i < 10; ++i) { tp->next(); EXPECT_EQ(tp->doc(), i); EXPECT_EQ(tp->nextPosition(), 1); } tp->seek(newLucene<Term>(field, L"a")); for (int32_t i = 0; i < 10; ++i) { tp->next(); EXPECT_EQ(tp->doc(), i); EXPECT_EQ(tp->nextPosition(), 0); } }
void createIndex(const DirectoryPtr& dir, bool multiSegment) { IndexWriter::unlock(dir); IndexWriterPtr w = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED); w->setMergePolicy(newLucene<LogDocMergePolicy>(w)); for (int32_t i = 0; i < 100; ++i) { w->addDocument(createDocument(i, 4)); if (multiSegment && (i % 10) == 0) { w->commit(); } } if (!multiSegment) { w->optimize(); } w->close(); IndexReaderPtr r = IndexReader::open(dir, false); if (multiSegment) { EXPECT_TRUE(r->getSequentialSubReaders().size() > 1); } else { EXPECT_EQ(r->getSequentialSubReaders().size(), 1); } r->close(); }
static void createIndexNoClose(bool multiSegment, const String& indexName, const IndexWriterPtr& w) { for (int32_t i = 0; i < 100; ++i) { w->addDocument(createDocument(i, indexName, 4)); } if (!multiSegment) { w->optimize(); } }
void addDoc(RAMDirectoryPtr ramDir1, const String& s, bool create) { IndexWriterPtr iw = newLucene<IndexWriter>(ramDir1, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), create, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"body", s, Field::STORE_YES, Field::INDEX_ANALYZED)); iw->addDocument(doc); iw->close(); }
static void addDocs2(IndexWriterPtr writer, int32_t numDocs) { for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"content", L"bbb", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } }
virtual void run() { try { for (int32_t j = 0; j < numDocs; ++j) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"test", newLucene<PoolingPayloadTokenStream>(pool))); writer->addDocument(d); } } catch (LuceneException& e) { FAIL() << "Unexpected exception: " << e.getError(); } }
void addDocs(const DirectoryPtr& dir, int32_t ndocs, bool compound) { IndexWriterPtr iw = newLucene<IndexWriter>(dir, anlzr, false, IndexWriter::MaxFieldLengthLIMITED); iw->setMaxBufferedDocs(5); iw->setMergeFactor(3); iw->setSimilarity(similarityOne); iw->setUseCompoundFile(compound); for (int32_t i = 0; i < ndocs; ++i) { iw->addDocument(newDoc()); } iw->close(); }
// Run one indexer and 2 searchers against single index as stress test. static void runTest(DirectoryPtr directory) { Collection<TimedThreadPtr> threads(Collection<TimedThreadPtr>::newInstance(4)); AnalyzerPtr analyzer = newLucene<SimpleAnalyzer>(); IndexWriterPtr writer = newLucene<MockIndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(7); writer->setMergeFactor(3); // Establish a base index of 100 docs for (int32_t i = 0; i < 100; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); if ((i - 1) % 7 == 0) writer->commit(); writer->addDocument(d); } writer->commit(); IndexReaderPtr r = IndexReader::open(directory, true); BOOST_CHECK_EQUAL(100, r->numDocs()); r->close(); IndexerThreadPtr indexerThread1 = newLucene<IndexerThread>(writer); threads[0] = indexerThread1; indexerThread1->start(); IndexerThreadPtr indexerThread2 = newLucene<IndexerThread>(writer); threads[1] = indexerThread2; indexerThread2->start(); SearcherThreadPtr searcherThread1 = newLucene<SearcherThread>(directory); threads[2] = searcherThread1; searcherThread1->start(); SearcherThreadPtr searcherThread2 = newLucene<SearcherThread>(directory); threads[3] = searcherThread2; searcherThread2->start(); indexerThread1->join(); indexerThread2->join(); searcherThread1->join(); searcherThread2->join(); writer->close(); BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1 BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2 BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1 BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2 }
TEST_F(IndexWriterReaderTest, testUpdateDocument) { bool optimize = true; DirectoryPtr dir1 = newLucene<MockRAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(dir1, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED); // create the index createIndexNoClose(!optimize, L"index1", writer); // get a reader IndexReaderPtr r1 = writer->getReader(); EXPECT_TRUE(r1->isCurrent()); String id10 = r1->document(10)->getField(L"id")->stringValue(); DocumentPtr newDoc = r1->document(10); newDoc->removeField(L"id"); newDoc->add(newLucene<Field>(L"id", StringUtils::toString(8000), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->updateDocument(newLucene<Term>(L"id", id10), newDoc); EXPECT_TRUE(!r1->isCurrent()); IndexReaderPtr r2 = writer->getReader(); EXPECT_TRUE(r2->isCurrent()); EXPECT_EQ(0, count(newLucene<Term>(L"id", id10), r2)); EXPECT_EQ(1, count(newLucene<Term>(L"id", StringUtils::toString(8000)), r2)); r1->close(); writer->close(); EXPECT_TRUE(r2->isCurrent()); IndexReaderPtr r3 = IndexReader::open(dir1, true); EXPECT_TRUE(r3->isCurrent()); EXPECT_TRUE(r2->isCurrent()); EXPECT_EQ(0, count(newLucene<Term>(L"id", id10), r3)); EXPECT_EQ(1, count(newLucene<Term>(L"id", StringUtils::toString(8000)), r3)); writer = newLucene<IndexWriter>(dir1, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"field", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); EXPECT_TRUE(r2->isCurrent()); EXPECT_TRUE(r3->isCurrent()); writer->close(); EXPECT_TRUE(!r2->isCurrent()); EXPECT_TRUE(!r3->isCurrent()); r2->close(); r3->close(); dir1->close(); }
DateSortTest() { // Create an index writer. directory = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); // oldest doc: // Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 writer->addDocument(createDocument(L"Document 1", 1192001122000LL)); // Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 writer->addDocument(createDocument(L"Document 2", 1192001126000LL)); // Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 writer->addDocument(createDocument(L"Document 3", 1192101133000LL)); // Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 writer->addDocument(createDocument(L"Document 4", 1192104129000LL)); // latest doc: // Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 writer->addDocument(createDocument(L"Document 5", 1192209943000LL)); writer->optimize(); writer->close(); }
void runTest(DirectoryPtr directory, MergeSchedulerPtr merger) { IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); if (merger) writer->setMergeScheduler(merger); for (int32_t iter = 0; iter < NUM_ITER; ++iter) { int32_t iterFinal = iter; writer->setMergeFactor(1000); for (int32_t i = 0; i < 200; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } writer->setMergeFactor(4); Collection<LuceneThreadPtr> threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS); for (int32_t i = 0; i < NUM_THREADS; ++i) { int32_t iFinal = i; IndexWriterPtr writerFinal = writer; threads[i] = newLucene<OptimizeThread>(NUM_ITER2, iterFinal, iFinal, writer, writerFinal); } for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); int32_t expectedDocCount = (int32_t)((1 + iter) * (200 + 8 * NUM_ITER2 * (int32_t)(((double)NUM_THREADS / 2.0) * (double)(1 + NUM_THREADS)))); BOOST_CHECK_EQUAL(expectedDocCount, writer->maxDoc()); writer->close(); writer = newLucene<IndexWriter>(directory, analyzer, false, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); IndexReaderPtr reader = IndexReader::open(directory, true); BOOST_CHECK(reader->isOptimized()); BOOST_CHECK_EQUAL(expectedDocCount, reader->numDocs()); reader->close(); } writer->close(); }
static void fillIndex(DirectoryPtr dir, int32_t start, int32_t numDocs) { IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(2); writer->setMaxBufferedDocs(2); for (int32_t i = start; i < (start + numDocs); ++i) { DocumentPtr temp = newLucene<Document>(); temp->add(newLucene<Field>(L"count", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(temp); } writer->close(); }
TermScorerFixture() { values = newCollection<String>(L"all", L"dogs dogs", L"like", L"playing", L"fetch", L"all"); directory = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < values.size(); ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, values[i], Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); indexSearcher = newLucene<IndexSearcher>(directory, false); indexReader = indexSearcher->getIndexReader(); }
MultiThreadTermVectorsFixture() { directory = newLucene<RAMDirectory>(); numDocs = 100; numThreads = 3; IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<SimpleAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene<Document>(); FieldablePtr fld = newLucene<Field>(L"field", intToEnglish(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_YES); doc->add(fld); writer->addDocument(doc); } writer->close(); }
/// One-time setup to initialise static members void setup() { // set the theoretical maximum term count for 8bit (see docs for the number) BooleanQuery::setMaxClauseCount(3 * 255 * 2 + 255); directory = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthUNLIMITED); NumericFieldPtr field8 = newLucene<NumericField>(L"field8", 8, Field::STORE_YES, true); NumericFieldPtr field4 = newLucene<NumericField>(L"field4", 4, Field::STORE_YES, true); NumericFieldPtr field2 = newLucene<NumericField>(L"field2", 2, Field::STORE_YES, true); NumericFieldPtr fieldNoTrie = newLucene<NumericField>(L"field" + StringUtils::toString(INT_MAX), INT_MAX, Field::STORE_YES, true); NumericFieldPtr ascfield8 = newLucene<NumericField>(L"ascfield8", 8, Field::STORE_NO, true); NumericFieldPtr ascfield4 = newLucene<NumericField>(L"ascfield4", 4, Field::STORE_NO, true); NumericFieldPtr ascfield2 = newLucene<NumericField>(L"ascfield2", 2, Field::STORE_NO, true); DocumentPtr doc = newLucene<Document>(); // add fields, that have a distance to test general functionality doc->add(field8); doc->add(field4); doc->add(field2); doc->add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive doc->add(ascfield8); doc->add(ascfield4); doc->add(ascfield2); // Add a series of noDocs docs with increasing int values for (int32_t l = 0; l < noDocs; ++l) { int32_t val = distance * l + startOffset; field8->setIntValue(val); field4->setIntValue(val); field2->setIntValue(val); fieldNoTrie->setIntValue(val); val = l - (noDocs / 2); ascfield8->setIntValue(val); ascfield4->setIntValue(val); ascfield2->setIntValue(val); writer->addDocument(doc); } writer->optimize(); writer->close(); searcher = newLucene<IndexSearcher>(directory, true); }
double checkPhraseQuery(DocumentPtr doc, PhraseQueryPtr query, int32_t slop, int32_t expectedNumResults) { query->setSlop(slop); RAMDirectoryPtr ramDir = newLucene<RAMDirectory>(); WhitespaceAnalyzerPtr analyzer = newLucene<WhitespaceAnalyzer>(); IndexWriterPtr writer = newLucene<IndexWriter>(ramDir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED); writer->addDocument(doc); writer->close(); IndexSearcherPtr searcher = newLucene<IndexSearcher>(ramDir, true); TopDocsPtr td = searcher->search(query, FilterPtr(), 10); BOOST_CHECK_EQUAL(expectedNumResults, td->totalHits); searcher->close(); ramDir->close(); return td->maxScore; }
AddDirectoriesThreads(int32_t numDirs, const IndexWriterPtr& mainWriter) { this->numDirs = numDirs; this->mainWriter = mainWriter; threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS); failures = Collection<LuceneException>::newInstance(); didClose = false; count = newLucene<HeavyAtomicInt>(0); numAddIndexesNoOptimize = newLucene<HeavyAtomicInt>(0); addDir = newLucene<MockRAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(addDir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED); writer->setMaxBufferedDocs(2); for (int32_t i = 0; i < NUM_INIT_DOCS; ++i) { DocumentPtr doc = createDocument(i, L"addindex", 4); writer->addDocument(doc); } writer->close(); readers = Collection<IndexReaderPtr>::newInstance(numDirs); for (int32_t i = 0; i < numDirs; ++i) { readers[i] = IndexReader::open(addDir, false); } }
virtual void run() { try { for (int32_t j = 0; j < numIter; ++j) { writerFinal->optimize(false); for (int32_t k = 0; k < 17 * (1 + iFinal); ++k) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(iterFinal) + L"_" + StringUtils::toString(iFinal) + L"_" + StringUtils::toString(j) + L"_" + StringUtils::toString(k), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(iFinal + k), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } for (int32_t k = 0; k < 9 * (1 + iFinal); ++k) writerFinal->deleteDocuments(newLucene<Term>(L"id", StringUtils::toString(iterFinal) + L"_" + StringUtils::toString(iFinal) + L"_" + StringUtils::toString(j) + L"_" + StringUtils::toString(k))); writerFinal->optimize(); } } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } }
TEST_F(SimpleExplanationsOfNonMatchesTest, testTermQueryMultiSearcherExplain) { // creating two directories for indices DirectoryPtr indexStoreA = newLucene<MockRAMDirectory>(); DirectoryPtr indexStoreB = newLucene<MockRAMDirectory>(); DocumentPtr lDoc = newLucene<Document>(); lDoc->add(newLucene<Field>(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED)); DocumentPtr lDoc2 = newLucene<Document>(); lDoc2->add(newLucene<Field>(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED)); DocumentPtr lDoc3 = newLucene<Document>(); lDoc3->add(newLucene<Field>(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED)); IndexWriterPtr writerA = newLucene<IndexWriter>(indexStoreA, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); IndexWriterPtr writerB = newLucene<IndexWriter>(indexStoreB, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writerA->addDocument(lDoc); writerA->addDocument(lDoc2); writerA->optimize(); writerA->close(); writerB->addDocument(lDoc3); writerB->close(); QueryParserPtr parser = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, L"fulltext", newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT)); QueryPtr query = parser->parse(L"handle:1"); Collection<SearchablePtr> searchers = newCollection<SearchablePtr>( newLucene<IndexSearcher>(indexStoreB, true), newLucene<IndexSearcher>(indexStoreA, true) ); SearcherPtr mSearcher = newLucene<MultiSearcher>(searchers); Collection<ScoreDocPtr> hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(3, hits.size()); ExplanationPtr explain = mSearcher->explain(query, hits[0]->doc); String exp = explain->toString(); EXPECT_TRUE(exp.find(L"maxDocs=3") != String::npos); EXPECT_TRUE(exp.find(L"docFreq=3") != String::npos); query = parser->parse(L"handle:\"1 2\""); hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(3, hits.size()); explain = mSearcher->explain(query, hits[0]->doc); exp = explain->toString(); EXPECT_TRUE(exp.find(L"1=3") != String::npos); EXPECT_TRUE(exp.find(L"2=3") != String::npos); query = newLucene<SpanNearQuery>(newCollection<SpanQueryPtr>(newLucene<SpanTermQuery>(newLucene<Term>(L"handle", L"1")), newLucene<SpanTermQuery>(newLucene<Term>(L"handle", L"2"))), 0, true); hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(3, hits.size()); explain = mSearcher->explain(query, hits[0]->doc); exp = explain->toString(); EXPECT_TRUE(exp.find(L"1=3") != String::npos); EXPECT_TRUE(exp.find(L"2=3") != String::npos); mSearcher->close(); }
void addDoc(const IndexWriterPtr& writer, const String& value) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"content", value, Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); }
void add(const String& value, IndexWriterPtr iw) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(FIELD_NAME, value, Field::STORE_YES, Field::INDEX_ANALYZED)); iw->addDocument(d); }
/// Builds an index with payloads in the given Directory and performs different /// tests to verify the payload encoding static void encodingTest(const DirectoryPtr& dir) { PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>(); IndexWriterPtr writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); // should be in sync with value in TermInfosWriter int32_t skipInterval = 16; int32_t numTerms = 5; String fieldName = L"f1"; int32_t numDocs = skipInterval + 1; // create content for the test documents with just a few terms Collection<TermPtr> terms = generateTerms(fieldName, numTerms); StringStream sb; for (Collection<TermPtr>::iterator term = terms.begin(); term != terms.end(); ++term) { sb << (*term)->text() << L" "; } String content = sb.str(); int32_t payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; ByteArray payloadData = generateRandomData(payloadDataLength); DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(fieldName, content, Field::STORE_NO, Field::INDEX_ANALYZED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int32_t offset = 0; for (int32_t i = 0; i < 2 * numDocs; ++i) { analyzer->setPayloadData(fieldName, payloadData, offset, 1); offset += numTerms; writer->addDocument(d); } // make sure we create more than one segment to test merging writer->commit(); for (int32_t i = 0; i < numDocs; ++i) { analyzer->setPayloadData(fieldName, payloadData, offset, i); offset += i * numTerms; writer->addDocument(d); } writer->optimize(); // flush writer->close(); // Verify the index IndexReaderPtr reader = IndexReader::open(dir, true); ByteArray verifyPayloadData(ByteArray::newInstance(payloadDataLength)); offset = 0; Collection<TermPositionsPtr> tps = Collection<TermPositionsPtr>::newInstance(numTerms); for (int32_t i = 0; i < numTerms; ++i) { tps[i] = reader->termPositions(terms[i]); } while (tps[0]->next()) { for (int32_t i = 1; i < numTerms; ++i) { tps[i]->next(); } int32_t freq = tps[0]->freq(); for (int32_t i = 0; i < freq; ++i) { for (int32_t j = 0; j < numTerms; ++j) { tps[j]->nextPosition(); tps[j]->getPayload(verifyPayloadData, offset); offset += tps[j]->getPayloadLength(); } } } for (int32_t i = 0; i < numTerms; ++i) { tps[i]->close(); } EXPECT_TRUE(payloadData.equals(verifyPayloadData)); // test lazy skipping TermPositionsPtr tp = reader->termPositions(terms[0]); tp->next(); tp->nextPosition(); // now we don't read this payload tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); ByteArray payload = tp->getPayload(ByteArray(), 0); EXPECT_EQ(payload[0], payloadData[numTerms]); tp->nextPosition(); // we don't read this payload and skip to a different document tp->skipTo(5); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); payload = tp->getPayload(ByteArray(), 0); EXPECT_EQ(payload[0], payloadData[5 * numTerms]); // Test different lengths at skip points tp->seek(terms[1]); tp->next(); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(skipInterval - 1); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(2 * skipInterval - 1); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(3 * skipInterval - 1); tp->nextPosition(); EXPECT_EQ(3 * skipInterval - 2 * numDocs - 1, tp->getPayloadLength()); // Test multiple call of getPayload() tp->getPayload(ByteArray(), 0); // it is forbidden to call getPayload() more than once without calling nextPosition() try { tp->getPayload(ByteArray(), 0); } catch (IOException& e) { EXPECT_TRUE(check_exception(LuceneException::IO)(e)); } reader->close(); // test long payload analyzer = newLucene<PayloadAnalyzer>(); writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); String singleTerm = L"lucene"; d = newLucene<Document>(); d->add(newLucene<Field>(fieldName, singleTerm, Field::STORE_NO, Field::INDEX_ANALYZED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = generateRandomData(2000); analyzer->setPayloadData(fieldName, payloadData, 100, 1500); writer->addDocument(d); writer->optimize(); // flush writer->close(); reader = IndexReader::open(dir, true); tp = reader->termPositions(newLucene<Term>(fieldName, singleTerm)); tp->next(); tp->nextPosition(); verifyPayloadData.resize(tp->getPayloadLength()); tp->getPayload(verifyPayloadData, 0); ByteArray portion(ByteArray::newInstance(1500)); MiscUtils::arrayCopy(payloadData.get(), 100, portion.get(), 0, 1500); EXPECT_TRUE(portion.equals(verifyPayloadData)); reader->close(); }