TEST_F(LazyProxSkippingTest, testSeek) { DirectoryPtr directory = newLucene<RAMDirectory>(); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < 10; ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(field, L"a b", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); } writer->close(); IndexReaderPtr reader = IndexReader::open(directory, true); TermPositionsPtr tp = reader->termPositions(); tp->seek(newLucene<Term>(field, L"b")); for (int32_t i = 0; i < 10; ++i) { tp->next(); EXPECT_EQ(tp->doc(), i); EXPECT_EQ(tp->nextPosition(), 1); } tp->seek(newLucene<Term>(field, L"a")); for (int32_t i = 0; i < 10; ++i) { tp->next(); EXPECT_EQ(tp->doc(), i); EXPECT_EQ(tp->nextPosition(), 0); } }
MultipleTermPositions::MultipleTermPositions(IndexReaderPtr indexReader, Collection<TermPtr> terms) { Collection<TermPositionsPtr> termPositions(Collection<TermPositionsPtr>::newInstance()); for (Collection<TermPtr>::iterator term = terms.begin(); term != terms.end(); ++term) termPositions.add(indexReader->termPositions(*term)); termPositionsQueue = newLucene<TermPositionsQueue>(termPositions); posList = newLucene<IntQueue>(); _doc = 0; _freq = 0; }
ScorerPtr PhraseWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) { if (query->terms.empty()) // optimize zero-term case return ScorerPtr(); Collection<TermPositionsPtr> tps(Collection<TermPositionsPtr>::newInstance(query->terms.size())); for (int32_t i = 0; i < tps.size(); ++i) { TermPositionsPtr p(reader->termPositions(query->terms[i])); if (!p) return ScorerPtr(); tps[i] = p; } if (query->slop == 0) // optimize exact case return newLucene<ExactPhraseScorer>(shared_from_this(), tps, query->getPositions(), similarity, reader->norms(query->field)); else return newLucene<SloppyPhraseScorer>(shared_from_this(), tps, query->getPositions(), similarity, query->slop, reader->norms(query->field)); }
/// Builds an index with payloads in the given Directory and performs different /// tests to verify the payload encoding static void encodingTest(const DirectoryPtr& dir) { PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>(); IndexWriterPtr writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); // should be in sync with value in TermInfosWriter int32_t skipInterval = 16; int32_t numTerms = 5; String fieldName = L"f1"; int32_t numDocs = skipInterval + 1; // create content for the test documents with just a few terms Collection<TermPtr> terms = generateTerms(fieldName, numTerms); StringStream sb; for (Collection<TermPtr>::iterator term = terms.begin(); term != terms.end(); ++term) { sb << (*term)->text() << L" "; } String content = sb.str(); int32_t payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; ByteArray payloadData = generateRandomData(payloadDataLength); DocumentPtr d = newLucene<Document>(); d->add(newLucene<Field>(fieldName, content, Field::STORE_NO, Field::INDEX_ANALYZED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int32_t offset = 0; for (int32_t i = 0; i < 2 * numDocs; ++i) { analyzer->setPayloadData(fieldName, payloadData, offset, 1); offset += numTerms; writer->addDocument(d); } // make sure we create more than one segment to test merging writer->commit(); for (int32_t i = 0; i < numDocs; ++i) { analyzer->setPayloadData(fieldName, payloadData, offset, i); offset += i * numTerms; writer->addDocument(d); } writer->optimize(); // flush writer->close(); // Verify the index IndexReaderPtr reader = IndexReader::open(dir, true); ByteArray verifyPayloadData(ByteArray::newInstance(payloadDataLength)); offset = 0; Collection<TermPositionsPtr> tps = Collection<TermPositionsPtr>::newInstance(numTerms); for (int32_t i = 0; i < numTerms; ++i) { tps[i] = reader->termPositions(terms[i]); } while (tps[0]->next()) { for (int32_t i = 1; i < numTerms; ++i) { tps[i]->next(); } int32_t freq = tps[0]->freq(); for (int32_t i = 0; i < freq; ++i) { for (int32_t j = 0; j < numTerms; ++j) { tps[j]->nextPosition(); tps[j]->getPayload(verifyPayloadData, offset); offset += tps[j]->getPayloadLength(); } } } for (int32_t i = 0; i < numTerms; ++i) { tps[i]->close(); } EXPECT_TRUE(payloadData.equals(verifyPayloadData)); // test lazy skipping TermPositionsPtr tp = reader->termPositions(terms[0]); tp->next(); tp->nextPosition(); // now we don't read this payload tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); ByteArray payload = tp->getPayload(ByteArray(), 0); EXPECT_EQ(payload[0], payloadData[numTerms]); tp->nextPosition(); // we don't read this payload and skip to a different document tp->skipTo(5); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); payload = tp->getPayload(ByteArray(), 0); EXPECT_EQ(payload[0], payloadData[5 * numTerms]); // Test different lengths at skip points tp->seek(terms[1]); tp->next(); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(skipInterval - 1); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(2 * skipInterval - 1); tp->nextPosition(); EXPECT_EQ(1, tp->getPayloadLength()); tp->skipTo(3 * skipInterval - 1); tp->nextPosition(); EXPECT_EQ(3 * skipInterval - 2 * numDocs - 1, tp->getPayloadLength()); // Test multiple call of getPayload() tp->getPayload(ByteArray(), 0); // it is forbidden to call getPayload() more than once without calling nextPosition() try { tp->getPayload(ByteArray(), 0); } catch (IOException& e) { EXPECT_TRUE(check_exception(LuceneException::IO)(e)); } reader->close(); // test long payload analyzer = newLucene<PayloadAnalyzer>(); writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED); String singleTerm = L"lucene"; d = newLucene<Document>(); d->add(newLucene<Field>(fieldName, singleTerm, Field::STORE_NO, Field::INDEX_ANALYZED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = generateRandomData(2000); analyzer->setPayloadData(fieldName, payloadData, 100, 1500); writer->addDocument(d); writer->optimize(); // flush writer->close(); reader = IndexReader::open(dir, true); tp = reader->termPositions(newLucene<Term>(fieldName, singleTerm)); tp->next(); tp->nextPosition(); verifyPayloadData.resize(tp->getPayloadLength()); tp->getPayload(verifyPayloadData, 0); ByteArray portion(ByteArray::newInstance(1500)); MiscUtils::arrayCopy(payloadData.get(), 100, portion.get(), 0, 1500); EXPECT_TRUE(portion.equals(verifyPayloadData)); reader->close(); }