Ejemplo n.º 1
0
 boost::any StringCache::createValue(IndexReaderPtr reader, EntryPtr key)
 {
     EntryPtr entry(key);
     String field(entry->field);
     Collection<String> retArray(Collection<String>::newInstance(reader->maxDoc()));
     TermDocsPtr termDocs(reader->termDocs());
     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
     LuceneException finally;
     try
     {
         do
         {
             TermPtr term(termEnum->term());
             if (!term || term->field() != field)
                 break;
             String termval(term->text());
             termDocs->seek(termEnum);
             while (termDocs->next())
                 retArray[termDocs->doc()] = termval;
         }
         while (termEnum->next());
     }
     catch (LuceneException& e)
     {
         finally = e;
     }
     termDocs->close();
     termEnum->close();
     finally.throwException();
     return retArray;
 }
Ejemplo n.º 2
0
 boost::any StringIndexCache::createValue(IndexReaderPtr reader, EntryPtr key)
 {
     EntryPtr entry(key);
     String field(entry->field);
     Collection<int32_t> retArray(Collection<int32_t>::newInstance(reader->maxDoc()));
     Collection<String> mterms(Collection<String>::newInstance(reader->maxDoc() + 1));
     TermDocsPtr termDocs(reader->termDocs());
     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
     int32_t t = 0; // current term number
     
     // an entry for documents that have no terms in this field should a document with no terms be at 
     // top or bottom?  This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to 
     // change as well.
     mterms[t++] = L"";
     
     LuceneException finally;
     try
     {
         do
         {
             TermPtr term(termEnum->term());
             if (!term || term->field() != field || t >= mterms.size() )
                 break;
             
             // store term text
             mterms[t] = term->text();
             
             termDocs->seek(termEnum);
             while (termDocs->next())
                 retArray[termDocs->doc()] = t;
             
             ++t;
         }
         while (termEnum->next());
     }
     catch (LuceneException& e)
     {
         finally = e;
     }
     termDocs->close();
     termEnum->close();
     finally.throwException();
     
     if (t == 0)
     {
         // if there are no terms, make the term array have a single null entry
         mterms = Collection<String>::newInstance(1);
     }
     else if (t < mterms.size())
     {
         // if there are less terms than documents, trim off the dead array space
         mterms.resize(t);
     }
     
     return newLucene<StringIndex>(retArray, mterms);
 }
static int32_t count(const TermPtr& t, const IndexReaderPtr& r) {
    int32_t count = 0;
    TermDocsPtr td = r->termDocs(t);
    while (td->next()) {
        td->doc();
        ++count;
    }
    td->close();
    return count;
}
static void verifyTermDocs(DirectoryPtr dir, TermPtr term, int32_t numDocs)
{
    IndexReaderPtr reader = IndexReader::open(dir, true);
    TermDocsPtr termDocs = reader->termDocs(term);
    int32_t count = 0;
    while (termDocs->next())
        ++count;
    BOOST_CHECK_EQUAL(count, numDocs);
    reader->close();
}
Ejemplo n.º 5
0
 boost::any DoubleCache::createValue(IndexReaderPtr reader, EntryPtr key)
 {
     EntryPtr entry(key);
     String field(entry->field);
     DoubleParserPtr parser(VariantUtils::get<DoubleParserPtr>(entry->custom));
     if (!parser)
     {
         FieldCachePtr wrapper(_wrapper);
         boost::any doubles;
         try
         {
             doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER());
         }
         catch (NumberFormatException&)
         {
             doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER());
         }
         return doubles;
     }
     Collection<double> retArray;
     TermDocsPtr termDocs(reader->termDocs());
     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
     LuceneException finally;
     try
     {
         do
         {
             TermPtr term(termEnum->term());
             if (!term || term->field() != field)
                 break;
             double termval = parser->parseDouble(term->text());
             if (!retArray) // late init
                 retArray = Collection<double>::newInstance(reader->maxDoc());
             termDocs->seek(termEnum);
             while (termDocs->next())
                 retArray[termDocs->doc()] = termval;
         }
         while (termEnum->next());
     }
     catch (StopFillCacheException&)
     {
     }
     catch (LuceneException& e)
     {
         finally = e;
     }
     termDocs->close();
     termEnum->close();
     finally.throwException();
     if (!retArray) // no values
         retArray = Collection<double>::newInstance(reader->maxDoc());
     return retArray;
 }
Ejemplo n.º 6
0
 boost::any ByteCache::createValue(IndexReaderPtr reader, EntryPtr key)
 {
     EntryPtr entry(key);
     String field(entry->field);
     ByteParserPtr parser(VariantUtils::get<ByteParserPtr>(entry->custom));
     if (!parser)
         return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER());
     Collection<uint8_t> retArray(Collection<uint8_t>::newInstance(reader->maxDoc()));
     TermDocsPtr termDocs(reader->termDocs());
     TermEnumPtr termEnum(reader->terms(newLucene<Term>(field)));
     LuceneException finally;
     try
     {
         do
         {
             TermPtr term(termEnum->term());
             if (!term || term->field() != field)
                 break;
             uint8_t termval = parser->parseByte(term->text());
             termDocs->seek(termEnum);
             while (termDocs->next())
                 retArray[termDocs->doc()] = termval;
         }
         while (termEnum->next());
     }
     catch (StopFillCacheException&)
     {
     }
     catch (LuceneException& e)
     {
         finally = e;
     }
     termDocs->close();
     termEnum->close();
     finally.throwException();
     return retArray;
 }
    void checkSkipTo(int32_t indexDivisor) {
        DirectoryPtr dir = newLucene<RAMDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        TermPtr ta = newLucene<Term>(L"content", L"aaa");
        for (int32_t i = 0; i < 10; ++i) {
            addDoc(writer, L"aaa aaa aaa aaa");
        }

        TermPtr tb = newLucene<Term>(L"content", L"bbb");
        for (int32_t i = 0; i < 16; ++i) {
            addDoc(writer, L"bbb bbb bbb bbb");
        }

        TermPtr tc = newLucene<Term>(L"content", L"ccc");
        for (int32_t i = 0; i < 50; ++i) {
            addDoc(writer, L"ccc ccc ccc ccc");
        }

        // assure that we deal with a single segment
        writer->optimize();
        writer->close();

        IndexReaderPtr reader = IndexReader::open(dir, IndexDeletionPolicyPtr(), true, indexDivisor);

        TermDocsPtr tdocs = reader->termDocs();

        // without optimization (assumption skipInterval == 16)

        // with next
        tdocs->seek(ta);
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(0, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(1, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->skipTo(0));
        EXPECT_EQ(2, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(4));
        EXPECT_EQ(4, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(9));
        EXPECT_EQ(9, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(10));

        // without next
        tdocs->seek(ta);
        EXPECT_TRUE(tdocs->skipTo(0));
        EXPECT_EQ(0, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(4));
        EXPECT_EQ(4, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(9));
        EXPECT_EQ(9, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(10));

        // exactly skipInterval documents and therefore with optimization

        // with next
        tdocs->seek(tb);
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(10, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(11, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(12, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(15));
        EXPECT_EQ(15, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(24));
        EXPECT_EQ(24, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(25));
        EXPECT_EQ(25, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(26));

        // without next
        tdocs->seek(tb);
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(10, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(15));
        EXPECT_EQ(15, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(24));
        EXPECT_EQ(24, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(25));
        EXPECT_EQ(25, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(26));

        // much more than skipInterval documents and therefore with optimization

        // with next
        tdocs->seek(tc);
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(26, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(27, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(28, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(40));
        EXPECT_EQ(40, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(57));
        EXPECT_EQ(57, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(74));
        EXPECT_EQ(74, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(75));
        EXPECT_EQ(75, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(76));

        // without next
        tdocs->seek(tc);
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(26, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(40));
        EXPECT_EQ(40, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(57));
        EXPECT_EQ(57, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(74));
        EXPECT_EQ(74, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(75));
        EXPECT_EQ(75, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(76));

        tdocs->close();
        reader->close();
        dir->close();
    }