static void verifyDocFreq(const DirectoryPtr& dir) {
    IndexReaderPtr reader = IndexReader::open(dir, true);

    // create enumeration of all terms
    TermEnumPtr termEnum = reader->terms();
    // go to the first term (aaa)
    termEnum->next();
    // assert that term is 'aaa'
    EXPECT_EQ(L"aaa", termEnum->term()->text());
    EXPECT_EQ(200, termEnum->docFreq());
    // go to the second term (bbb)
    termEnum->next();
    // assert that term is 'bbb'
    EXPECT_EQ(L"bbb", termEnum->term()->text());
    EXPECT_EQ(100, termEnum->docFreq());

    termEnum->close();

    // create enumeration of terms after term 'aaa', including 'aaa'
    termEnum = reader->terms(newLucene<Term>(L"content", L"aaa"));
    // assert that term is 'aaa'
    EXPECT_EQ(L"aaa", termEnum->term()->text());
    EXPECT_EQ(200, termEnum->docFreq());
    // go to term 'bbb'
    termEnum->next();
    // assert that term is 'bbb'
    EXPECT_EQ(L"bbb", termEnum->term()->text());
    EXPECT_EQ(100, termEnum->docFreq());

    termEnum->close();
}
예제 #2
0
 PrefixTermEnum::PrefixTermEnum(IndexReaderPtr reader, TermPtr prefix)
 {
     this->_endEnum = false;
     this->prefix = prefix;
     
     setEnum(reader->terms(newLucene<Term>(prefix->field(), prefix->text())));
 }
예제 #3
0
 boost::any StringCache::createValue(IndexReaderPtr reader, EntryPtr key)
 {
     EntryPtr entry(key);
     String field(entry->field);
     Collection<String> retArray(Collection<String>::newInstance(reader->maxDoc()));
     TermDocsPtr termDocs(reader->termDocs());
     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
     LuceneException finally;
     try
     {
         do
         {
             TermPtr term(termEnum->term());
             if (!term || term->field() != field)
                 break;
             String termval(term->text());
             termDocs->seek(termEnum);
             while (termDocs->next())
                 retArray[termDocs->doc()] = termval;
         }
         while (termEnum->next());
     }
     catch (LuceneException& e)
     {
         finally = e;
     }
     termDocs->close();
     termEnum->close();
     finally.throwException();
     return retArray;
 }
예제 #4
0
 boost::any StringIndexCache::createValue(IndexReaderPtr reader, EntryPtr key)
 {
     EntryPtr entry(key);
     String field(entry->field);
     Collection<int32_t> retArray(Collection<int32_t>::newInstance(reader->maxDoc()));
     Collection<String> mterms(Collection<String>::newInstance(reader->maxDoc() + 1));
     TermDocsPtr termDocs(reader->termDocs());
     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
     int32_t t = 0; // current term number
     
     // an entry for documents that have no terms in this field should a document with no terms be at 
     // top or bottom?  This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to 
     // change as well.
     mterms[t++] = L"";
     
     LuceneException finally;
     try
     {
         do
         {
             TermPtr term(termEnum->term());
             if (!term || term->field() != field || t >= mterms.size() )
                 break;
             
             // store term text
             mterms[t] = term->text();
             
             termDocs->seek(termEnum);
             while (termDocs->next())
                 retArray[termDocs->doc()] = t;
             
             ++t;
         }
         while (termEnum->next());
     }
     catch (LuceneException& e)
     {
         finally = e;
     }
     termDocs->close();
     termEnum->close();
     finally.throwException();
     
     if (t == 0)
     {
         // if there are no terms, make the term array have a single null entry
         mterms = Collection<String>::newInstance(1);
     }
     else if (t < mterms.size())
     {
         // if there are less terms than documents, trim off the dead array space
         mterms.resize(t);
     }
     
     return newLucene<StringIndex>(retArray, mterms);
 }
예제 #5
0
 boost::any DoubleCache::createValue(IndexReaderPtr reader, EntryPtr key)
 {
     EntryPtr entry(key);
     String field(entry->field);
     DoubleParserPtr parser(VariantUtils::get<DoubleParserPtr>(entry->custom));
     if (!parser)
     {
         FieldCachePtr wrapper(_wrapper);
         boost::any doubles;
         try
         {
             doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER());
         }
         catch (NumberFormatException&)
         {
             doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER());
         }
         return doubles;
     }
     Collection<double> retArray;
     TermDocsPtr termDocs(reader->termDocs());
     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
     LuceneException finally;
     try
     {
         do
         {
             TermPtr term(termEnum->term());
             if (!term || term->field() != field)
                 break;
             double termval = parser->parseDouble(term->text());
             if (!retArray) // late init
                 retArray = Collection<double>::newInstance(reader->maxDoc());
             termDocs->seek(termEnum);
             while (termDocs->next())
                 retArray[termDocs->doc()] = termval;
         }
         while (termEnum->next());
     }
     catch (StopFillCacheException&)
     {
     }
     catch (LuceneException& e)
     {
         finally = e;
     }
     termDocs->close();
     termEnum->close();
     finally.throwException();
     if (!retArray) // no values
         retArray = Collection<double>::newInstance(reader->maxDoc());
     return retArray;
 }
예제 #6
0
WildcardTermEnum::WildcardTermEnum(const IndexReaderPtr& reader, const TermPtr& term) {
    _endEnum = false;
    searchTerm = term;
    field = searchTerm->field();
    String searchTermText(searchTerm->text());

    String::size_type sidx = searchTermText.find(WILDCARD_STRING);
    String::size_type cidx = searchTermText.find(WILDCARD_CHAR);
    String::size_type idx = sidx;
    if (idx == String::npos) {
        idx = cidx;
    } else if (cidx != String::npos) {
        idx = std::min(idx, cidx);
    }
    pre = idx != String::npos ? searchTerm->text().substr(0, idx) : L"";

    preLen = pre.length();
    text = searchTermText.substr(preLen);
    setEnum(reader->terms(newLucene<Term>(searchTerm->field(), pre)));
}
 TermRangeTermEnum::TermRangeTermEnum(IndexReaderPtr reader, const String& field, StringValue lowerTermText, 
                                      StringValue upperTermText, bool includeLower, bool includeUpper, CollatorPtr collator)
 {
     this->collator = collator;
     this->_endEnum = false;
     this->upperTermText = upperTermText;
     this->lowerTermText = lowerTermText;
     this->includeLower = includeLower;
     this->includeUpper = includeUpper;
     this->field = field;
     
     // do a little bit of normalization: open ended range queries should always be inclusive.
     if (VariantUtils::isNull(this->lowerTermText))
         this->includeLower = true;
     
     if (VariantUtils::isNull(this->upperTermText))
         this->includeUpper = true;
     
     String startTermText(collator ? L"" : VariantUtils::get<String>(this->lowerTermText));
     setEnum(reader->terms(newLucene<Term>(this->field, startTermText)));
 }
예제 #8
0
 boost::any ByteCache::createValue(IndexReaderPtr reader, EntryPtr key)
 {
     EntryPtr entry(key);
     String field(entry->field);
     ByteParserPtr parser(VariantUtils::get<ByteParserPtr>(entry->custom));
     if (!parser)
         return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER());
     Collection<uint8_t> retArray(Collection<uint8_t>::newInstance(reader->maxDoc()));
     TermDocsPtr termDocs(reader->termDocs());
     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
     LuceneException finally;
     try
     {
         do
         {
             TermPtr term(termEnum->term());
             if (!term || term->field() != field)
                 break;
             uint8_t termval = parser->parseByte(term->text());
             termDocs->seek(termEnum);
             while (termDocs->next())
                 retArray[termDocs->doc()] = termval;
         }
         while (termEnum->next());
     }
     catch (StopFillCacheException&)
     {
     }
     catch (LuceneException& e)
     {
         finally = e;
     }
     termDocs->close();
     termEnum->close();
     finally.throwException();
     return retArray;
 }