PrefixInBooleanQueryFixture() { directory = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 5137; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, L"meaninglessnames", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, L"tangfulin", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } for (int32_t i = 5138; i < 11377; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, L"meaninglessnames", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, L"tangfulin", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); }
ParallelTermEnumTestFixture() { RAMDirectoryPtr rd1 = newLucene<RAMDirectory>(); IndexWriterPtr iw1 = newLucene<IndexWriter>(rd1, newLucene<SimpleAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"field1", L"the quick brown fox jumps", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene<Field>(L"field2", L"the quick brown fox jumps", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene<Field>(L"field4", L"", Field::STORE_NO, Field::INDEX_ANALYZED)); iw1->addDocument(doc); iw1->close(); RAMDirectoryPtr rd2 = newLucene<RAMDirectory>(); IndexWriterPtr iw2 = newLucene<IndexWriter>(rd2, newLucene<SimpleAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); doc = newLucene<Document>(); doc->add(newLucene<Field>(L"field0", L"", Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene<Field>(L"field1", L"the fox jumps over the lazy dog", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene<Field>(L"field3", L"the fox jumps over the lazy dog", Field::STORE_YES, Field::INDEX_ANALYZED)); iw2->addDocument(doc); iw2->close(); this->ir1 = IndexReader::open(rd1, true); this->ir2 = IndexReader::open(rd2, true); }
FieldCacheSanityCheckerTestFixture() { RAMDirectoryPtr dirA = newLucene<RAMDirectory>(); RAMDirectoryPtr dirB = newLucene<RAMDirectory>(); IndexWriterPtr wA = newLucene<IndexWriter>(dirA, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); IndexWriterPtr wB = newLucene<IndexWriter>(dirB, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); int64_t theLong = LLONG_MAX; double theDouble = DBL_MAX; uint8_t theByte = UCHAR_MAX; int32_t theInt = INT_MAX; for (int32_t i = 0; i < NUM_DOCS; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"theLong", StringUtils::toString(theLong--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene<Field>(L"theDouble", StringUtils::toString(theDouble--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene<Field>(L"theByte", StringUtils::toString(theByte--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene<Field>(L"theInt", StringUtils::toString(theInt--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); if (i % 3 == 0) wA->addDocument(doc); else wB->addDocument(doc); } wA->close(); wB->close(); readerA = IndexReader::open(dirA, true); readerB = IndexReader::open(dirB, true); readerX = newLucene<MultiReader>(newCollection<IndexReaderPtr>(readerA, readerB)); }
// Run one indexer and 2 searchers against single index as stress test. static void runTest(DirectoryPtr directory) { Collection<TimedThreadPtr> threads(Collection<TimedThreadPtr>::newInstance(4)); AnalyzerPtr analyzer = newLucene<SimpleAnalyzer>(); IndexWriterPtr writer = newLucene<MockIndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(7); writer->setMergeFactor(3); // Establish a base index of 100 docs for (int32_t i = 0; i < 100; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); if ((i - 1) % 7 == 0) writer->commit(); writer->addDocument(d); } writer->commit(); IndexReaderPtr r = IndexReader::open(directory, true); BOOST_CHECK_EQUAL(100, r->numDocs()); r->close(); IndexerThreadPtr indexerThread1 = newLucene<IndexerThread>(writer); threads[0] = indexerThread1; indexerThread1->start(); IndexerThreadPtr indexerThread2 = newLucene<IndexerThread>(writer); threads[1] = indexerThread2; indexerThread2->start(); SearcherThreadPtr searcherThread1 = newLucene<SearcherThread>(directory); threads[2] = searcherThread1; searcherThread1->start(); SearcherThreadPtr searcherThread2 = newLucene<SearcherThread>(directory); threads[3] = searcherThread2; searcherThread2->start(); indexerThread1->join(); indexerThread2->join(); searcherThread1->join(); searcherThread2->join(); writer->close(); BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1 BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2 BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1 BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2 }
virtual void doWork() { // Update all 100 docs for (int32_t i = 0; i < 100; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->updateDocument(newLucene<Term>(L"id", StringUtils::toString(i)), d); } }
TEST_F(IndexWriterReaderTest, testUpdateDocument) { bool optimize = true; DirectoryPtr dir1 = newLucene<MockRAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(dir1, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED); // create the index createIndexNoClose(!optimize, L"index1", writer); // get a reader IndexReaderPtr r1 = writer->getReader(); EXPECT_TRUE(r1->isCurrent()); String id10 = r1->document(10)->getField(L"id")->stringValue(); DocumentPtr newDoc = r1->document(10); newDoc->removeField(L"id"); newDoc->add(newLucene<Field>(L"id", StringUtils::toString(8000), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->updateDocument(newLucene<Term>(L"id", id10), newDoc); EXPECT_TRUE(!r1->isCurrent()); IndexReaderPtr r2 = writer->getReader(); EXPECT_TRUE(r2->isCurrent()); EXPECT_EQ(0, count(newLucene<Term>(L"id", id10), r2)); EXPECT_EQ(1, count(newLucene<Term>(L"id", StringUtils::toString(8000)), r2)); r1->close(); writer->close(); EXPECT_TRUE(r2->isCurrent()); IndexReaderPtr r3 = IndexReader::open(dir1, true); EXPECT_TRUE(r3->isCurrent()); EXPECT_TRUE(r2->isCurrent()); EXPECT_EQ(0, count(newLucene<Term>(L"id", id10), r3)); EXPECT_EQ(1, count(newLucene<Term>(L"id", StringUtils::toString(8000)), r3)); writer = newLucene<IndexWriter>(dir1, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"field", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); EXPECT_TRUE(r2->isCurrent()); EXPECT_TRUE(r3->isCurrent()); writer->close(); EXPECT_TRUE(!r2->isCurrent()); EXPECT_TRUE(!r3->isCurrent()); r2->close(); r3->close(); dir1->close(); }
void runTest(DirectoryPtr directory, MergeSchedulerPtr merger) { IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); if (merger) writer->setMergeScheduler(merger); for (int32_t iter = 0; iter < NUM_ITER; ++iter) { int32_t iterFinal = iter; writer->setMergeFactor(1000); for (int32_t i = 0; i < 200; ++i) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } writer->setMergeFactor(4); Collection<LuceneThreadPtr> threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS); for (int32_t i = 0; i < NUM_THREADS; ++i) { int32_t iFinal = i; IndexWriterPtr writerFinal = writer; threads[i] = newLucene<OptimizeThread>(NUM_ITER2, iterFinal, iFinal, writer, writerFinal); } for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->start(); for (int32_t i = 0; i < NUM_THREADS; ++i) threads[i]->join(); int32_t expectedDocCount = (int32_t)((1 + iter) * (200 + 8 * NUM_ITER2 * (int32_t)(((double)NUM_THREADS / 2.0) * (double)(1 + NUM_THREADS)))); BOOST_CHECK_EQUAL(expectedDocCount, writer->maxDoc()); writer->close(); writer = newLucene<IndexWriter>(directory, analyzer, false, IndexWriter::MaxFieldLengthUNLIMITED); writer->setMaxBufferedDocs(2); IndexReaderPtr reader = IndexReader::open(directory, true); BOOST_CHECK(reader->isOptimized()); BOOST_CHECK_EQUAL(expectedDocCount, reader->numDocs()); reader->close(); } writer->close(); }
static DocumentPtr createDocument(int32_t n, const String& indexName, int32_t numFields) { StringStream sb; DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"id", StringUtils::toString(n), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene<Field>(L"indexname", indexName, Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); sb << L"a" << n; doc->add(newLucene<Field>(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); sb << L" b" << n; for (int32_t i = 1; i < numFields; ++i) { doc->add(newLucene<Field>(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS)); } return doc; }
DocumentPtr createDocument(int32_t n, int32_t numFields) { StringStream sb; DocumentPtr doc = newLucene<Document>(); sb << L"a" << n; doc->add(newLucene<Field>(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene<Field>(L"fielda", sb.str(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); doc->add(newLucene<Field>(L"fieldb", sb.str(), Field::STORE_YES, Field::INDEX_NO)); sb << L" b" << n; for (int32_t i = 1; i < numFields; ++i) { doc->add(newLucene<Field>(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED)); } return doc; }
DocumentPtr createDocument(const String& text, int64_t time) { DocumentPtr document = newLucene<Document>(); // Add the text field. FieldPtr textField = newLucene<Field>(TEXT_FIELD, text, Field::STORE_YES, Field::INDEX_ANALYZED); document->add(textField); // Add the date/time field. String dateTimeString = DateTools::timeToString(time, DateTools::RESOLUTION_SECOND); FieldPtr dateTimeField = newLucene<Field>(DATE_TIME_FIELD, dateTimeString, Field::STORE_YES, Field::INDEX_NOT_ANALYZED); document->add(dateTimeField); return document; }
/// Build a randomish document for both RAMDirectory and MemoryIndex, and run all the queries against it. void checkAgainstRAMDirectory() { StringStream fooField; StringStream termField; // add up to 250 terms to field "foo" int32_t fieldCount = random->nextInt(250) + 1; for (int32_t i = 0; i < fieldCount; ++i) { fooField << L" " << randomTerm(); } // add up to 250 terms to field "foo" int32_t termCount = random->nextInt(250) + 1; for (int32_t i = 0; i < termCount; ++i) { termField << L" " << randomTerm(); } RAMDirectoryPtr ramdir = newLucene<RAMDirectory>(); AnalyzerPtr analyzer = randomAnalyzer(); IndexWriterPtr writer = newLucene<IndexWriter>(ramdir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene<Document>(); FieldPtr field1 = newLucene<Field>(L"foo", fooField.str(), Field::STORE_NO, Field::INDEX_ANALYZED); FieldPtr field2 = newLucene<Field>(L"term", termField.str(), Field::STORE_NO, Field::INDEX_ANALYZED); doc->add(field1); doc->add(field2); writer->addDocument(doc); writer->close(); MemoryIndexPtr memory = newLucene<MemoryIndex>(); memory->addField(L"foo", fooField.str(), analyzer); memory->addField(L"term", termField.str(), analyzer); checkAllQueries(memory, ramdir, analyzer); }
void createIndex(int32_t numHits) { int32_t numDocs = 500; DirectoryPtr directory = newLucene<SeekCountingDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); writer->setUseCompoundFile(false); writer->setMaxBufferedDocs(10); for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene<Document>(); String content; if (i % (numDocs / numHits) == 0) { // add a document that matches the query "term1 term2" content = term1 + L" " + term2; } else if (i % 15 == 0) { // add a document that only contains term1 content = term1 + L" " + term1; } else { // add a document that contains term2 but not term 1 content = term3 + L" " + term2; } doc->add(newLucene<Field>(field, content, Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } // make sure the index has only a single segment writer->optimize(); writer->close(); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(directory); searcher = newLucene<IndexSearcher>(reader); }
TEST_F(LazyProxSkippingTest, testSeek) { DirectoryPtr directory = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 10; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(field, L"a b", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); IndexReaderPtr reader = IndexReader::open(directory, true); TermPositionsPtr tp = reader->termPositions(); tp->seek(newLucene<Term>(field, L"b")); for (int32_t i = 0; i < 10; ++i) { tp->next(); EXPECT_EQ(tp->doc(), i); EXPECT_EQ(tp->nextPosition(), 1); } tp->seek(newLucene<Term>(field, L"a")); for (int32_t i = 0; i < 10; ++i) { tp->next(); EXPECT_EQ(tp->doc(), i); EXPECT_EQ(tp->nextPosition(), 0); } }
TEST_F(BooleanScorerTest, testMethod) { static const String FIELD = L"category"; RAMDirectoryPtr directory = newLucene<RAMDirectory>(); Collection<String> values = newCollection<String>(L"1", L"2", L"3", L"4"); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < values.size(); ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, values[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); BooleanQueryPtr booleanQuery1 = newLucene<BooleanQuery>(); booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"1")), BooleanClause::SHOULD); booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"2")), BooleanClause::SHOULD); BooleanQueryPtr query = newLucene<BooleanQuery>(); query->add(booleanQuery1, BooleanClause::MUST); query->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"9")), BooleanClause::MUST_NOT); IndexSearcherPtr indexSearcher = newLucene<IndexSearcher>(directory, true); Collection<ScoreDocPtr> hits = indexSearcher->search(query, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(2, hits.size()); }
/// One-time setup to initialise static members void setup() { // set the theoretical maximum term count for 8bit (see docs for the number) BooleanQuery::setMaxClauseCount(3 * 255 * 2 + 255); directory = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthUNLIMITED); NumericFieldPtr field8 = newLucene<NumericField>(L"field8", 8, Field::STORE_YES, true); NumericFieldPtr field4 = newLucene<NumericField>(L"field4", 4, Field::STORE_YES, true); NumericFieldPtr field2 = newLucene<NumericField>(L"field2", 2, Field::STORE_YES, true); NumericFieldPtr fieldNoTrie = newLucene<NumericField>(L"field" + StringUtils::toString(INT_MAX), INT_MAX, Field::STORE_YES, true); NumericFieldPtr ascfield8 = newLucene<NumericField>(L"ascfield8", 8, Field::STORE_NO, true); NumericFieldPtr ascfield4 = newLucene<NumericField>(L"ascfield4", 4, Field::STORE_NO, true); NumericFieldPtr ascfield2 = newLucene<NumericField>(L"ascfield2", 2, Field::STORE_NO, true); DocumentPtr doc = newLucene<Document>(); // add fields, that have a distance to test general functionality doc->add(field8); doc->add(field4); doc->add(field2); doc->add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive doc->add(ascfield8); doc->add(ascfield4); doc->add(ascfield2); // Add a series of noDocs docs with increasing int values for (int32_t l = 0; l < noDocs; ++l) { int32_t val = distance * l + startOffset; field8->setIntValue(val); field4->setIntValue(val); field2->setIntValue(val); fieldNoTrie->setIntValue(val); val = l - (noDocs / 2); ascfield8->setIntValue(val); ascfield4->setIntValue(val); ascfield2->setIntValue(val); writer->addDocument(doc); } writer->optimize(); writer->close(); searcher = newLucene<IndexSearcher>(directory, true); }
DocumentPtr makeDocument(const String& docText) { DocumentPtr doc = newLucene<Document>(); FieldPtr f = newLucene<Field>(L"f", docText, Field::STORE_NO, Field::INDEX_ANALYZED); f->setOmitNorms(true); doc->add(f); return doc; }
void addDoc(RAMDirectoryPtr ramDir1, const String& s, bool create) { IndexWriterPtr iw = newLucene<IndexWriter>(ramDir1, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), create, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"body", s, Field::STORE_YES, Field::INDEX_ANALYZED)); iw->addDocument(doc); iw->close(); }
ExplanationsFixture::ExplanationsFixture() { qp = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, FIELD, newLucene<WhitespaceAnalyzer>()); docFields = newCollection<String>(L"w1 w2 w3 w4 w5", L"w1 w3 w2 w3 zz", L"w1 xx w2 yy w3", L"w1 w3 xx w2 yy w3 zz"); RAMDirectoryPtr directory = newLucene<RAMDirectory>(); IndexWriterPtr writer= newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < docFields.size(); ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(KEY, StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene<Field>(FIELD, docFields[i], Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); searcher = newLucene<IndexSearcher>(directory, true); }
static void addDocs2(IndexWriterPtr writer, int32_t numDocs) { for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"content", L"bbb", Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); } }
DocumentPtr newDoc() { DocumentPtr d = newLucene<Document>(); double boost = nextNorm(); for (int32_t i = 0; i < 10; ++i) { FieldPtr f = newLucene<Field>(L"f" + StringUtils::toString(i), L"v" + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED); f->setBoost(boost); d->add(f); } return d; }
virtual void run() { try { for (int32_t j = 0; j < numDocs; ++j) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"test", newLucene<PoolingPayloadTokenStream>(pool))); writer->addDocument(d); } } catch (LuceneException& e) { FAIL() << "Unexpected exception: " << e.getError(); } }
/// Tests whether the DocumentWriter and SegmentMerger correctly enable the payload bit in the FieldInfo TEST_F(PayloadsTest, testPayloadFieldBit) { DirectoryPtr ram = newLucene<RAMDirectory>(); PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>(); IndexWriterPtr writer = newLucene<IndexWriter>(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr d = newLucene<Document>(); // this field won't have any payloads d->add(newLucene<Field>(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d->add(newLucene<Field>(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED)); // only add payload data for field f2 ByteArray someData(ByteArray::newInstance(8)); uint8_t input[8] = { 's', 'o', 'm', 'e', 'd', 'a', 't', 'a' }; std::memcpy(someData.get(), input, 8); analyzer->setPayloadData(L"f2", 1, someData, 0, 1); writer->addDocument(d); // flush writer->close(); SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(ram); FieldInfosPtr fi = reader->fieldInfos(); EXPECT_TRUE(!fi->fieldInfo(L"f1")->storePayloads); EXPECT_TRUE(fi->fieldInfo(L"f2")->storePayloads); EXPECT_TRUE(!fi->fieldInfo(L"f3")->storePayloads); reader->close(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field writer = newLucene<IndexWriter>(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); d = newLucene<Document>(); d->add(newLucene<Field>(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED)); d->add(newLucene<Field>(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED)); // add payload data for field f2 and f3 analyzer->setPayloadData(L"f2", someData, 0, 1); analyzer->setPayloadData(L"f3", someData, 0, 3); writer->addDocument(d); // force merge writer->optimize(); // flush writer->close(); reader = SegmentReader::getOnlySegmentReader(ram); fi = reader->fieldInfos(); EXPECT_TRUE(!fi->fieldInfo(L"f1")->storePayloads); EXPECT_TRUE(fi->fieldInfo(L"f2")->storePayloads); EXPECT_TRUE(fi->fieldInfo(L"f3")->storePayloads); reader->close(); }
static void fillIndex(DirectoryPtr dir, int32_t start, int32_t numDocs) { IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->setMergeFactor(2); writer->setMaxBufferedDocs(2); for (int32_t i = start; i < (start + numDocs); ++i) { DocumentPtr temp = newLucene<Document>(); temp->add(newLucene<Field>(L"count", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(temp); } writer->close(); }
TermScorerFixture() { values = newCollection<String>(L"all", L"dogs dogs", L"like", L"playing", L"fetch", L"all"); directory = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < values.size(); ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, values[i], Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); indexSearcher = newLucene<IndexSearcher>(directory, false); indexReader = indexSearcher->getIndexReader(); }
virtual void run() { try { for (int32_t j = 0; j < numIter; ++j) { writerFinal->optimize(false); for (int32_t k = 0; k < 17 * (1 + iFinal); ++k) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(L"id", StringUtils::toString(iterFinal) + L"_" + StringUtils::toString(iFinal) + L"_" + StringUtils::toString(j) + L"_" + StringUtils::toString(k), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); d->add(newLucene<Field>(L"contents", intToEnglish(iFinal + k), Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(d); } for (int32_t k = 0; k < 9 * (1 + iFinal); ++k) writerFinal->deleteDocuments(newLucene<Term>(L"id", StringUtils::toString(iterFinal) + L"_" + StringUtils::toString(iFinal) + L"_" + StringUtils::toString(j) + L"_" + StringUtils::toString(k))); writerFinal->optimize(); } } catch (LuceneException& e) { BOOST_FAIL("Unexpected exception: " << e.getError()); } }
MultiThreadTermVectorsFixture() { directory = newLucene<RAMDirectory>(); numDocs = 100; numThreads = 3; IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<SimpleAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < numDocs; ++i) { DocumentPtr doc = newLucene<Document>(); FieldablePtr fld = newLucene<Field>(L"field", intToEnglish(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_YES); doc->add(fld); writer->addDocument(doc); } writer->close(); }
void FuzzyIndex::appendFields( const Tomahawk::IndexData& data ) { try { DocumentPtr doc = newLucene<Document>(); if ( !data.track.isEmpty() ) { doc->add(newLucene<Field>( L"fulltext", Tomahawk::DatabaseImpl::sortname( QString( "%1 %2" ).arg( data.artist ).arg( data.track ) ).toStdWString(), Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) ); doc->add(newLucene<Field>( L"track", Tomahawk::DatabaseImpl::sortname( data.track ).toStdWString(), Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) ); doc->add(newLucene<Field>( L"artist", Tomahawk::DatabaseImpl::sortname( data.artist ).toStdWString(), Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) ); doc->add(newLucene<Field>( L"artistid", QString::number( data.artistId ).toStdWString(), Field::STORE_YES, Field::INDEX_NO ) ); doc->add(newLucene<Field>( L"trackid", QString::number( data.id ).toStdWString(), Field::STORE_YES, Field::INDEX_NO ) ); } else if ( !data.album.isEmpty() ) { doc->add(newLucene<Field>( L"album", Tomahawk::DatabaseImpl::sortname( data.album ).toStdWString(), Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) ); doc->add(newLucene<Field>( L"albumid", QString::number( data.id ).toStdWString(), Field::STORE_YES, Field::INDEX_NO ) ); } else return; m_luceneWriter->addDocument( doc ); } catch( LuceneException& error ) { tDebug() << "Caught Lucene error:" << error.what(); QTimer::singleShot( 0, this, SLOT( wipeIndex() ) ); } }
/// Builds an index with payloads in the given Directory and performs different /// tests to verify the payload encoding static void encodingTest(const DirectoryPtr& dir) { PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>(); IndexWriterPtr writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); // should be in sync with value in TermInfosWriter int32_t skipInterval = 16; int32_t numTerms = 5; String fieldName = L"f1"; int32_t numDocs = skipInterval + 1; // create content for the test documents with just a few terms Collection<TermPtr> terms = generateTerms(fieldName, numTerms); StringStream sb; for (Collection<TermPtr>::iterator term = terms.begin(); term != terms.end(); ++term) { sb << (*term)->text() << L" "; } String content = sb.str(); int32_t payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; ByteArray payloadData = generateRandomData(payloadDataLength); DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(fieldName, content, Field::STORE_NO, Field::INDEX_ANALYZED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int32_t offset = 0; for (int32_t i = 0; i < 2 * numDocs; ++i) { analyzer->setPayloadData(fieldName, payloadData, offset, 1); offset += numTerms; writer->addDocument(d); } // make sure we create more than one segment to test merging writer->commit(); for (int32_t i = 0; i < numDocs; ++i) { analyzer->setPayloadData(fieldName, payloadData, offset, i); offset += i * numTerms; writer->addDocument(d); } writer->optimize(); // flush writer->close(); // Verify the index IndexReaderPtr reader = IndexReader::open(dir, true); ByteArray verifyPayloadData(ByteArray::newInstance(payloadDataLength)); offset = 0; Collection<TermPositionsPtr> tps = Collection<TermPositionsPtr>::newInstance(numTerms); for (int32_t i = 0; i < numTerms; ++i) { tps[i] = reader->termPositions(terms[i]); } while (tps[0]->next()) { for (int32_t i = 1; i < numTerms; ++i) { tps[i]->next(); } int32_t freq = tps[0]->freq(); for (int32_t i = 0; i < freq; ++i) { for (int32_t j = 0; j < numTerms; ++j) { tps[j]->nextPosition(); tps[j]->getPayload(verifyPayloadData, offset); offset += tps[j]->getPayloadLength(); } } } for (int32_t i = 0; i < numTerms; ++i) { tps[i]->close(); } EXPECT_TRUE(payloadData.equals(verifyPayloadData)); // test lazy skipping TermPositionsPtr tp = reader->termPositions(terms[0]); tp->next(); tp->nextPosition(); // now we don't read this payload tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); ByteArray payload = tp->getPayload(ByteArray(), 0); EXPECT_EQ(payload[0], payloadData[numTerms]); tp->nextPosition(); // we don't read this payload and skip to a different document tp->skipTo(5); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); payload = tp->getPayload(ByteArray(), 0); EXPECT_EQ(payload[0], payloadData[5 * numTerms]); // Test different lengths at skip points tp->seek(terms[1]); tp->next(); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(skipInterval - 1); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(2 * skipInterval - 1); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(3 * skipInterval - 1); tp->nextPosition(); EXPECT_EQ(3 * skipInterval - 2 * numDocs - 1, tp->getPayloadLength()); // Test multiple call of getPayload() tp->getPayload(ByteArray(), 0); // it is forbidden to call getPayload() more than once without calling nextPosition() try { tp->getPayload(ByteArray(), 0); } catch (IOException& e) { EXPECT_TRUE(check_exception(LuceneException::IO)(e)); } reader->close(); // test long payload analyzer = newLucene<PayloadAnalyzer>(); writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); String singleTerm = L"lucene"; d = newLucene<Document>(); d->add(newLucene<Field>(fieldName, singleTerm, Field::STORE_NO, Field::INDEX_ANALYZED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = generateRandomData(2000); analyzer->setPayloadData(fieldName, payloadData, 100, 1500); writer->addDocument(d); writer->optimize(); // flush writer->close(); reader = IndexReader::open(dir, true); tp = reader->termPositions(newLucene<Term>(fieldName, singleTerm)); tp->next(); tp->nextPosition(); verifyPayloadData.resize(tp->getPayloadLength()); tp->getPayload(verifyPayloadData, 0); ByteArray portion(ByteArray::newInstance(1500)); MiscUtils::arrayCopy(payloadData.get(), 100, portion.get(), 0, 1500); EXPECT_TRUE(portion.equals(verifyPayloadData)); reader->close(); }
void add(const String& value, IndexWriterPtr iw) { DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(FIELD_NAME, value, Field::STORE_YES, Field::INDEX_ANALYZED)); iw->addDocument(d); }
void addDoc(const IndexWriterPtr& writer, const String& value) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(L"content", value, Field::STORE_NO, Field::INDEX_ANALYZED)); writer->addDocument(doc); }