static void verifyDocFreq(const DirectoryPtr& dir) { IndexReaderPtr reader = IndexReader::open(dir, true); // create enumeration of all terms TermEnumPtr termEnum = reader->terms(); // go to the first term (aaa) termEnum->next(); // assert that term is 'aaa' EXPECT_EQ(L"aaa", termEnum->term()->text()); EXPECT_EQ(200, termEnum->docFreq()); // go to the second term (bbb) termEnum->next(); // assert that term is 'bbb' EXPECT_EQ(L"bbb", termEnum->term()->text()); EXPECT_EQ(100, termEnum->docFreq()); termEnum->close(); // create enumeration of terms after term 'aaa', including 'aaa' termEnum = reader->terms(newLucene<Term>(L"content", L"aaa")); // assert that term is 'aaa' EXPECT_EQ(L"aaa", termEnum->term()->text()); EXPECT_EQ(200, termEnum->docFreq()); // go to term 'bbb' termEnum->next(); // assert that term is 'bbb' EXPECT_EQ(L"bbb", termEnum->term()->text()); EXPECT_EQ(100, termEnum->docFreq()); termEnum->close(); }
PrefixTermEnum::PrefixTermEnum(IndexReaderPtr reader, TermPtr prefix) { this->_endEnum = false; this->prefix = prefix; setEnum(reader->terms(newLucene<Term>(prefix->field(), prefix->text()))); }
boost::any StringCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); Collection<String> retArray(Collection<String>::newInstance(reader->maxDoc())); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; String termval(term->text()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); return retArray; }
boost::any StringIndexCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); Collection<int32_t> retArray(Collection<int32_t>::newInstance(reader->maxDoc())); Collection<String> mterms(Collection<String>::newInstance(reader->maxDoc() + 1)); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); int32_t t = 0; // current term number // an entry for documents that have no terms in this field should a document with no terms be at // top or bottom? This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to // change as well. mterms[t++] = L""; LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field || t >= mterms.size() ) break; // store term text mterms[t] = term->text(); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = t; ++t; } while (termEnum->next()); } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (t == 0) { // if there are no terms, make the term array have a single null entry mterms = Collection<String>::newInstance(1); } else if (t < mterms.size()) { // if there are less terms than documents, trim off the dead array space mterms.resize(t); } return newLucene<StringIndex>(retArray, mterms); }
boost::any DoubleCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); DoubleParserPtr parser(VariantUtils::get<DoubleParserPtr>(entry->custom)); if (!parser) { FieldCachePtr wrapper(_wrapper); boost::any doubles; try { doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER()); } catch (NumberFormatException&) { doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER()); } return doubles; } Collection<double> retArray; TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; double termval = parser->parseDouble(term->text()); if (!retArray) // late init retArray = Collection<double>::newInstance(reader->maxDoc()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (!retArray) // no values retArray = Collection<double>::newInstance(reader->maxDoc()); return retArray; }
WildcardTermEnum::WildcardTermEnum(const IndexReaderPtr& reader, const TermPtr& term) { _endEnum = false; searchTerm = term; field = searchTerm->field(); String searchTermText(searchTerm->text()); String::size_type sidx = searchTermText.find(WILDCARD_STRING); String::size_type cidx = searchTermText.find(WILDCARD_CHAR); String::size_type idx = sidx; if (idx == String::npos) { idx = cidx; } else if (cidx != String::npos) { idx = std::min(idx, cidx); } pre = idx != String::npos ? searchTerm->text().substr(0, idx) : L""; preLen = pre.length(); text = searchTermText.substr(preLen); setEnum(reader->terms(newLucene<Term>(searchTerm->field(), pre))); }
TermRangeTermEnum::TermRangeTermEnum(IndexReaderPtr reader, const String& field, StringValue lowerTermText, StringValue upperTermText, bool includeLower, bool includeUpper, CollatorPtr collator) { this->collator = collator; this->_endEnum = false; this->upperTermText = upperTermText; this->lowerTermText = lowerTermText; this->includeLower = includeLower; this->includeUpper = includeUpper; this->field = field; // do a little bit of normalization: open ended range queries should always be inclusive. if (VariantUtils::isNull(this->lowerTermText)) this->includeLower = true; if (VariantUtils::isNull(this->upperTermText)) this->includeUpper = true; String startTermText(collator ? L"" : VariantUtils::get<String>(this->lowerTermText)); setEnum(reader->terms(newLucene<Term>(this->field, startTermText))); }
boost::any ByteCache::createValue(IndexReaderPtr reader, EntryPtr key) { EntryPtr entry(key); String field(entry->field); ByteParserPtr parser(VariantUtils::get<ByteParserPtr>(entry->custom)); if (!parser) return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER()); Collection<uint8_t> retArray(Collection<uint8_t>::newInstance(reader->maxDoc())); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene<Term>(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) break; uint8_t termval = parser->parseByte(term->text()); termDocs->seek(termEnum); while (termDocs->next()) retArray[termDocs->doc()] = termval; } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); return retArray; }