void testRangeSplit(int32_t precisionStep) { RandomPtr rnd = newLucene<Random>(); String field = L"ascfield" + StringUtils::toString(precisionStep); // 50 random tests for (int32_t i = 0; i < 50; ++i) { int32_t lower = (int32_t)(rnd->nextDouble() * noDocs - noDocs / 2.0); int32_t upper = (int32_t)(rnd->nextDouble() * noDocs - noDocs / 2.0); if (lower > upper) { std::swap(lower, upper); } // test inclusive range QueryPtr tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); TopDocsPtr tTopDocs = searcher->search(tq, 1); EXPECT_EQ(upper - lower + 1, tTopDocs->totalHits); // test exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, false); tTopDocs = searcher->search(tq, 1); EXPECT_EQ(std::max(upper - lower - 1, (int32_t)0), tTopDocs->totalHits); // test left exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, true); tTopDocs = searcher->search(tq, 1); EXPECT_EQ(upper - lower, tTopDocs->totalHits); // test right exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, false); tTopDocs = searcher->search(tq, 1); EXPECT_EQ(upper - lower, tTopDocs->totalHits); } }
void checkAllQueries(const MemoryIndexPtr& memory, const RAMDirectoryPtr& ramdir, const AnalyzerPtr& analyzer) { IndexSearcherPtr ram = newLucene<IndexSearcher>(ramdir); IndexSearcherPtr mem = memory->createSearcher(); QueryParserPtr qp = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, L"foo", analyzer); for (HashSet<String>::iterator query = queries.begin(); query != queries.end(); ++query) { TopDocsPtr ramDocs = ram->search(qp->parse(*query), 1); TopDocsPtr memDocs = mem->search(qp->parse(*query), 1); EXPECT_EQ(ramDocs->totalHits, memDocs->totalHits); } }
/// Test that values loaded for FieldScoreQuery are cached properly and consumes /// the proper RAM resources. void doTestCaching(const String& field, FieldScoreQuery::Type tp) { // prepare expected array types for comparison HashMap<FieldScoreQuery::Type, CollectionValue> expectedArrayTypes = HashMap<FieldScoreQuery::Type, CollectionValue>::newInstance(); expectedArrayTypes.put(FieldScoreQuery::BYTE, Collection<uint8_t>::newInstance()); expectedArrayTypes.put(FieldScoreQuery::INT, Collection<int32_t>::newInstance()); expectedArrayTypes.put(FieldScoreQuery::DOUBLE, Collection<double>::newInstance()); IndexSearcherPtr s = newLucene<IndexSearcher>(dir, true); Collection<CollectionValue> innerArray = Collection<CollectionValue>::newInstance(s->getIndexReader()->getSequentialSubReaders().size()); bool warned = false; // print warning once. for (int32_t i = 0; i < 10; ++i) { FieldScoreQueryPtr q = newLucene<FieldScoreQuery>(field, tp); Collection<ScoreDocPtr> h = s->search(q, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(N_DOCS, h.size()); Collection<IndexReaderPtr> readers = s->getIndexReader()->getSequentialSubReaders(); for (int32_t j = 0; j < readers.size(); ++j) { IndexReaderPtr reader = readers[j]; try { if (i == 0) { innerArray[j] = q->valSrc->getValues(reader)->getInnerArray(); EXPECT_TRUE(VariantUtils::equalsType(innerArray[j], expectedArrayTypes.get(tp))); } else { EXPECT_TRUE(VariantUtils::equals(innerArray[j], q->valSrc->getValues(reader)->getInnerArray())); } } catch (UnsupportedOperationException&) { if (!warned) { // std::cout << "WARNING: Cannot fully test values of " << StringUtils::toUTF8(q->toString()); warned = true; } } } } // verify new values are reloaded (not reused) for a new reader s = newLucene<IndexSearcher>(dir, true); FieldScoreQueryPtr q = newLucene<FieldScoreQuery>(field, tp); Collection<ScoreDocPtr> h = s->search(q, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(N_DOCS, h.size()); Collection<IndexReaderPtr> readers = s->getIndexReader()->getSequentialSubReaders(); for (int32_t j = 0; j < readers.size(); ++j) { IndexReaderPtr reader = readers[j]; try { EXPECT_TRUE(!equalCollectionValues(innerArray[j], q->valSrc->getValues(reader)->getInnerArray())); } catch (UnsupportedOperationException&) { if (!warned) { // std::cout << "WARNING: Cannot fully test values of " << StringUtils::toUTF8(q->toString()); warned = true; } } } }
/// test for both constant score and boolean query, the other tests only use the constant score mode void testRange(int32_t precisionStep) { String field = L"field" + StringUtils::toString(precisionStep); int32_t count = 3000; int32_t lower = (distance * 3 / 2) + startOffset; int32_t upper = lower + count * distance + (distance / 3); NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); NumericRangeFilterPtr f = NumericRangeFilter::newIntRange(field, precisionStep, lower, upper, true, true); int32_t lastTerms = 0; for (uint8_t i = 0; i < 3; ++i) { TopDocsPtr topDocs; int32_t terms; String type; q->clearTotalNumberOfTerms(); f->clearTotalNumberOfTerms(); switch (i) { case 0: type = L" (constant score filter rewrite)"; q->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); terms = q->getTotalNumberOfTerms(); break; case 1: type = L" (constant score boolean rewrite)"; q->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); terms = q->getTotalNumberOfTerms(); break; case 2: type = L" (filter)"; topDocs = searcher->search(newLucene<MatchAllDocsQuery>(), f, noDocs, Sort::INDEXORDER()); terms = f->getTotalNumberOfTerms(); break; default: return; } // std::cout << "Found " << terms << " distinct terms in range for field '" << field << "'" << type << "."; Collection<ScoreDocPtr> sd = topDocs->scoreDocs; EXPECT_TRUE(sd); EXPECT_EQ(count, sd.size()); DocumentPtr doc = searcher->doc(sd[0]->doc); EXPECT_EQ(StringUtils::toString(2 * distance + startOffset), doc->get(field)); doc = searcher->doc(sd[sd.size() - 1]->doc); EXPECT_EQ(StringUtils::toString((1 + count) * distance + startOffset), doc->get(field)); if (i > 0) { EXPECT_EQ(lastTerms, terms); } lastTerms = terms; } }
void testRandomTrieAndClassicRangeQuery(int32_t precisionStep) { RandomPtr rnd = newLucene<Random>(); String field = L"field" + StringUtils::toString(precisionStep); int32_t termCountT = 0; int32_t termCountC = 0; for (int32_t i = 0; i < 50; ++i) { int32_t lower = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; int32_t upper = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; if (lower > upper) { std::swap(lower, upper); } // test inclusive range NumericRangeQueryPtr tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); TermRangeQueryPtr cq = newLucene<TermRangeQuery>(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), true, true); TopDocsPtr tTopDocs = searcher->search(tq, 1); TopDocsPtr cTopDocs = searcher->search(cq, 1); EXPECT_EQ(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, false); cq = newLucene<TermRangeQuery>(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), false, false); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); EXPECT_EQ(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test left exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, false, true); cq = newLucene<TermRangeQuery>(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), false, true); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); EXPECT_EQ(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); // test right exclusive range tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, false); cq = newLucene<TermRangeQuery>(field, NumericUtils::intToPrefixCoded(lower), NumericUtils::intToPrefixCoded(upper), true, false); tTopDocs = searcher->search(tq, 1); cTopDocs = searcher->search(cq, 1); EXPECT_EQ(cTopDocs->totalHits, tTopDocs->totalHits); termCountT += tq->getTotalNumberOfTerms(); termCountC += cq->getTotalNumberOfTerms(); } if (precisionStep == INT_MAX) { EXPECT_EQ(termCountT, termCountC); } }
TEST_F(DateSortTest, testReverseDateSort) { IndexSearcherPtr searcher = newLucene<IndexSearcher>(directory, true); SortPtr sort = newLucene<Sort>(newLucene<SortField>(DATE_TIME_FIELD, SortField::STRING, true)); QueryParserPtr queryParser = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, TEXT_FIELD, newLucene<WhitespaceAnalyzer>()); QueryPtr query = queryParser->parse(L"Document"); // Execute the search and process the search results. Collection<String> actualOrder = Collection<String>::newInstance(5); Collection<ScoreDocPtr>hits = searcher->search(query, FilterPtr(), 1000, sort)->scoreDocs; for (int32_t i = 0; i < hits.size(); ++i) { DocumentPtr document = searcher->doc(hits[i]->doc); String text = document->get(TEXT_FIELD); actualOrder[i] = text; } searcher->close(); // Set up the expected order (ie. Document 5, 4, 3, 2, 1). Collection<String> expectedOrder = Collection<String>::newInstance(5); expectedOrder[0] = L"Document 5"; expectedOrder[1] = L"Document 4"; expectedOrder[2] = L"Document 3"; expectedOrder[3] = L"Document 2"; expectedOrder[4] = L"Document 1"; EXPECT_TRUE(expectedOrder.equals(actualOrder)); }
void testSorting(int32_t precisionStep) { RandomPtr rnd = newLucene<Random>(); String field = L"field" + StringUtils::toString(precisionStep); // 10 random tests, the index order is ascending, so using a reverse sort field should return descending documents for (int32_t i = 0; i < 10; ++i) { int32_t lower = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; int32_t upper = (int32_t)(rnd->nextDouble() * noDocs * distance) + startOffset; if (lower > upper) { std::swap(lower, upper); } QueryPtr tq = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); TopDocsPtr topDocs = searcher->search(tq, FilterPtr(), noDocs, newLucene<Sort>(newLucene<SortField>(field, SortField::INT, true))); if (topDocs->totalHits == 0) { continue; } Collection<ScoreDocPtr> sd = topDocs->scoreDocs; EXPECT_TRUE(sd); int32_t last = StringUtils::toInt(searcher->doc(sd[0]->doc)->get(field)); for (int32_t j = 1; j < sd.size(); ++j) { int32_t act = StringUtils::toInt(searcher->doc(sd[j]->doc)->get(field)); EXPECT_TRUE(last > act); last = act; } } }
TEST_F(BooleanScorerTest, testMethod) { static const String FIELD = L"category"; RAMDirectoryPtr directory = newLucene<RAMDirectory>(); Collection<String> values = newCollection<String>(L"1", L"2", L"3", L"4"); IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED); for (int32_t i = 0; i < values.size(); ++i) { DocumentPtr doc = newLucene<Document>(); doc->add(newLucene<Field>(FIELD, values[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); writer->addDocument(doc); } writer->close(); BooleanQueryPtr booleanQuery1 = newLucene<BooleanQuery>(); booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"1")), BooleanClause::SHOULD); booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"2")), BooleanClause::SHOULD); BooleanQueryPtr query = newLucene<BooleanQuery>(); query->add(booleanQuery1, BooleanClause::MUST); query->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"9")), BooleanClause::MUST_NOT); IndexSearcherPtr indexSearcher = newLucene<IndexSearcher>(directory, true); Collection<ScoreDocPtr> hits = indexSearcher->search(query, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(2, hits.size()); }
/// Test that FieldScoreQuery returns docs with expected score. void doTestExactScore(const String& field, FieldScoreQuery::Type tp) { IndexSearcherPtr s = newLucene<IndexSearcher>(dir, true); QueryPtr q = newLucene<FieldScoreQuery>(field, tp); TopDocsPtr td = s->search(q, FilterPtr(), 1000); EXPECT_EQ(N_DOCS, td->totalHits); Collection<ScoreDocPtr> sd = td->scoreDocs; for (int32_t i = 0; i < sd.size(); ++i) { double score = sd[i]->score; String id = s->getIndexReader()->document(sd[i]->doc)->get(ID_FIELD); double expectedScore = expectedFieldScore(id); // "ID7" --> 7.0 EXPECT_NEAR(expectedScore, score, TEST_SCORE_TOLERANCE_DELTA); } }
/// Test that FieldScoreQuery returns docs in expected order. void doTestRank(const String& field, FieldScoreQuery::Type tp) { IndexSearcherPtr s = newLucene<IndexSearcher>(dir, true); QueryPtr q = newLucene<FieldScoreQuery>(field,tp); QueryUtils::check(q, s); Collection<ScoreDocPtr> h = s->search(q, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(N_DOCS, h.size()); String prevID = L"ID" + StringUtils::toString(N_DOCS + 1); // greater than all ids of docs in this test for (int32_t i = 0; i < h.size(); ++i) { String resID = s->doc(h[i]->doc)->get(ID_FIELD); EXPECT_TRUE(resID.compare(prevID) < 0); prevID = resID; } }
void testRightOpenRange(int32_t precisionStep) { String field = L"field" + StringUtils::toString(precisionStep); int32_t count = 3000; int32_t lower = (count - 1) * distance + (distance / 3) + startOffset; NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(field, precisionStep, lower, INT_MAX, true, true); TopDocsPtr topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); Collection<ScoreDocPtr> sd = topDocs->scoreDocs; EXPECT_TRUE(sd); EXPECT_EQ(noDocs - count, sd.size()); DocumentPtr doc = searcher->doc(sd[0]->doc); EXPECT_EQ(StringUtils::toString(count * distance + startOffset), doc->get(field)); doc = searcher->doc(sd[sd.size() - 1]->doc); EXPECT_EQ(StringUtils::toString((noDocs - 1) * distance + startOffset), doc->get(field)); }
double checkPhraseQuery(DocumentPtr doc, PhraseQueryPtr query, int32_t slop, int32_t expectedNumResults) { query->setSlop(slop); RAMDirectoryPtr ramDir = newLucene<RAMDirectory>(); WhitespaceAnalyzerPtr analyzer = newLucene<WhitespaceAnalyzer>(); IndexWriterPtr writer = newLucene<IndexWriter>(ramDir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED); writer->addDocument(doc); writer->close(); IndexSearcherPtr searcher = newLucene<IndexSearcher>(ramDir, true); TopDocsPtr td = searcher->search(query, FilterPtr(), 10); BOOST_CHECK_EQUAL(expectedNumResults, td->totalHits); searcher->close(); ramDir->close(); return td->maxScore; }