PrefixInBooleanQueryFixture()
 {
     directory = newLucene<RAMDirectory>();
     IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
     
     for (int32_t i = 0; i < 5137; ++i)
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(FIELD, L"meaninglessnames", Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
         writer->addDocument(doc);
     }
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(FIELD, L"tangfulin", Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
         writer->addDocument(doc);
     }
     
     for (int32_t i = 5138; i < 11377; ++i)
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(FIELD, L"meaninglessnames", Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
         writer->addDocument(doc);
     }
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(FIELD, L"tangfulin", Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
         writer->addDocument(doc);
     }
     
     writer->close();
 }
    ParallelTermEnumTestFixture()
    {
        RAMDirectoryPtr rd1 = newLucene<RAMDirectory>();
        IndexWriterPtr iw1 = newLucene<IndexWriter>(rd1, newLucene<SimpleAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        DocumentPtr doc = newLucene<Document>();
        doc->add(newLucene<Field>(L"field1", L"the quick brown fox jumps", Field::STORE_YES, Field::INDEX_ANALYZED));
        doc->add(newLucene<Field>(L"field2", L"the quick brown fox jumps", Field::STORE_YES, Field::INDEX_ANALYZED));
        doc->add(newLucene<Field>(L"field4", L"", Field::STORE_NO, Field::INDEX_ANALYZED));
        iw1->addDocument(doc);

        iw1->close();
        RAMDirectoryPtr rd2 = newLucene<RAMDirectory>();
        IndexWriterPtr iw2 = newLucene<IndexWriter>(rd2, newLucene<SimpleAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        doc = newLucene<Document>();
        doc->add(newLucene<Field>(L"field0", L"", Field::STORE_NO, Field::INDEX_ANALYZED));
        doc->add(newLucene<Field>(L"field1", L"the fox jumps over the lazy dog", Field::STORE_YES, Field::INDEX_ANALYZED));
        doc->add(newLucene<Field>(L"field3", L"the fox jumps over the lazy dog", Field::STORE_YES, Field::INDEX_ANALYZED));
        iw2->addDocument(doc);

        iw2->close();

        this->ir1 = IndexReader::open(rd1, true);
        this->ir2 = IndexReader::open(rd2, true);
    }
    FieldCacheSanityCheckerTestFixture()
    {
        RAMDirectoryPtr dirA = newLucene<RAMDirectory>();
        RAMDirectoryPtr dirB = newLucene<RAMDirectory>();

        IndexWriterPtr wA = newLucene<IndexWriter>(dirA, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
        IndexWriterPtr wB = newLucene<IndexWriter>(dirB, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        int64_t theLong = LLONG_MAX;
        double theDouble = DBL_MAX;
        uint8_t theByte = UCHAR_MAX;
        int32_t theInt = INT_MAX;
        for (int32_t i = 0; i < NUM_DOCS; ++i)
        {
            DocumentPtr doc = newLucene<Document>();
            doc->add(newLucene<Field>(L"theLong", StringUtils::toString(theLong--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            doc->add(newLucene<Field>(L"theDouble", StringUtils::toString(theDouble--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            doc->add(newLucene<Field>(L"theByte", StringUtils::toString(theByte--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            doc->add(newLucene<Field>(L"theInt", StringUtils::toString(theInt--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            if (i % 3 == 0)
                wA->addDocument(doc);
            else
                wB->addDocument(doc);
        }
        wA->close();
        wB->close();
        readerA = IndexReader::open(dirA, true);
        readerB = IndexReader::open(dirB, true);
        readerX = newLucene<MultiReader>(newCollection<IndexReaderPtr>(readerA, readerB));
    }
// Run one indexer and 2 searchers against single index as stress test.
static void runTest(DirectoryPtr directory)
{
    Collection<TimedThreadPtr> threads(Collection<TimedThreadPtr>::newInstance(4));
    AnalyzerPtr analyzer = newLucene<SimpleAnalyzer>();
    
    IndexWriterPtr writer = newLucene<MockIndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED);
    
    writer->setMaxBufferedDocs(7);
    writer->setMergeFactor(3);
    
    // Establish a base index of 100 docs
    for (int32_t i = 0; i < 100; ++i)
    {
        DocumentPtr d = newLucene<Document>();
        d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
        d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED));
        if ((i - 1) % 7 == 0)
            writer->commit();
        writer->addDocument(d);
    }
    writer->commit();
    
    IndexReaderPtr r = IndexReader::open(directory, true);
    BOOST_CHECK_EQUAL(100, r->numDocs());
    r->close();

    IndexerThreadPtr indexerThread1 = newLucene<IndexerThread>(writer);
    threads[0] = indexerThread1;
    indexerThread1->start();

    IndexerThreadPtr indexerThread2 = newLucene<IndexerThread>(writer);
    threads[1] = indexerThread2;
    indexerThread2->start();

    SearcherThreadPtr searcherThread1 = newLucene<SearcherThread>(directory);
    threads[2] = searcherThread1;
    searcherThread1->start();

    SearcherThreadPtr searcherThread2 = newLucene<SearcherThread>(directory);
    threads[3] = searcherThread2;
    searcherThread2->start();
    
    indexerThread1->join();
    indexerThread2->join();
    searcherThread1->join();
    searcherThread2->join();
    
    writer->close();

    BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1
    BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2
    BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1
    BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2
}
 virtual void doWork()
 {
     // Update all 100 docs
     for (int32_t i = 0; i < 100; ++i)
     {
         DocumentPtr d = newLucene<Document>();
         d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
         d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED));
         writer->updateDocument(newLucene<Term>(L"id", StringUtils::toString(i)), d);
     }
 }
TEST_F(IndexWriterReaderTest, testUpdateDocument) {
    bool optimize = true;

    DirectoryPtr dir1 = newLucene<MockRAMDirectory>();
    IndexWriterPtr writer = newLucene<IndexWriter>(dir1, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);

    // create the index
    createIndexNoClose(!optimize, L"index1", writer);

    // get a reader
    IndexReaderPtr r1 = writer->getReader();
    EXPECT_TRUE(r1->isCurrent());

    String id10 = r1->document(10)->getField(L"id")->stringValue();

    DocumentPtr newDoc = r1->document(10);
    newDoc->removeField(L"id");
    newDoc->add(newLucene<Field>(L"id", StringUtils::toString(8000), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
    writer->updateDocument(newLucene<Term>(L"id", id10), newDoc);
    EXPECT_TRUE(!r1->isCurrent());

    IndexReaderPtr r2 = writer->getReader();
    EXPECT_TRUE(r2->isCurrent());
    EXPECT_EQ(0, count(newLucene<Term>(L"id", id10), r2));
    EXPECT_EQ(1, count(newLucene<Term>(L"id", StringUtils::toString(8000)), r2));

    r1->close();
    writer->close();
    EXPECT_TRUE(r2->isCurrent());

    IndexReaderPtr r3 = IndexReader::open(dir1, true);
    EXPECT_TRUE(r3->isCurrent());
    EXPECT_TRUE(r2->isCurrent());
    EXPECT_EQ(0, count(newLucene<Term>(L"id", id10), r3));
    EXPECT_EQ(1, count(newLucene<Term>(L"id", StringUtils::toString(8000)), r3));

    writer = newLucene<IndexWriter>(dir1, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);
    DocumentPtr doc = newLucene<Document>();
    doc->add(newLucene<Field>(L"field", L"a b c", Field::STORE_NO, Field::INDEX_ANALYZED));
    writer->addDocument(doc);
    EXPECT_TRUE(r2->isCurrent());
    EXPECT_TRUE(r3->isCurrent());

    writer->close();

    EXPECT_TRUE(!r2->isCurrent());
    EXPECT_TRUE(!r3->isCurrent());

    r2->close();
    r3->close();

    dir1->close();
}
    void runTest(DirectoryPtr directory, MergeSchedulerPtr merger)
    {
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED);
        writer->setMaxBufferedDocs(2);
        if (merger)
            writer->setMergeScheduler(merger);

        for (int32_t iter = 0; iter < NUM_ITER; ++iter)
        {
            int32_t iterFinal = iter;

            writer->setMergeFactor(1000);

            for (int32_t i = 0; i < 200; ++i)
            {
                DocumentPtr d = newLucene<Document>();
                d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
                d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED));
                writer->addDocument(d);
            }

            writer->setMergeFactor(4);

            Collection<LuceneThreadPtr> threads = Collection<LuceneThreadPtr>::newInstance(NUM_THREADS);

            for (int32_t i = 0; i < NUM_THREADS; ++i)
            {
                int32_t iFinal = i;
                IndexWriterPtr writerFinal = writer;
                threads[i] = newLucene<OptimizeThread>(NUM_ITER2, iterFinal, iFinal, writer, writerFinal);
            }

            for (int32_t i = 0; i < NUM_THREADS; ++i)
                threads[i]->start();
            for (int32_t i = 0; i < NUM_THREADS; ++i)
                threads[i]->join();

            int32_t expectedDocCount = (int32_t)((1 + iter) * (200 + 8 * NUM_ITER2 * (int32_t)(((double)NUM_THREADS / 2.0) * (double)(1 + NUM_THREADS))));

            BOOST_CHECK_EQUAL(expectedDocCount, writer->maxDoc());

            writer->close();
            writer = newLucene<IndexWriter>(directory, analyzer, false, IndexWriter::MaxFieldLengthUNLIMITED);
            writer->setMaxBufferedDocs(2);

            IndexReaderPtr reader = IndexReader::open(directory, true);
            BOOST_CHECK(reader->isOptimized());
            BOOST_CHECK_EQUAL(expectedDocCount, reader->numDocs());
            reader->close();
        }
        writer->close();
    }
static DocumentPtr createDocument(int32_t n, const String& indexName, int32_t numFields) {
    StringStream sb;
    DocumentPtr doc = newLucene<Document>();
    doc->add(newLucene<Field>(L"id", StringUtils::toString(n), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
    doc->add(newLucene<Field>(L"indexname", indexName, Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS));
    sb << L"a" << n;
    doc->add(newLucene<Field>(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS));
    sb << L" b" << n;
    for (int32_t i = 1; i < numFields; ++i) {
        doc->add(newLucene<Field>(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED, Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS));
    }
    return doc;
}
 DocumentPtr createDocument(int32_t n, int32_t numFields) {
     StringStream sb;
     DocumentPtr doc = newLucene<Document>();
     sb << L"a" << n;
     doc->add(newLucene<Field>(L"field1", sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED));
     doc->add(newLucene<Field>(L"fielda", sb.str(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS));
     doc->add(newLucene<Field>(L"fieldb", sb.str(), Field::STORE_YES, Field::INDEX_NO));
     sb << L" b" << n;
     for (int32_t i = 1; i < numFields; ++i) {
         doc->add(newLucene<Field>(L"field" + StringUtils::toString(i + 1), sb.str(), Field::STORE_YES, Field::INDEX_ANALYZED));
     }
     return doc;
 }
Example #10
0
    DocumentPtr createDocument(const String& text, int64_t time) {
        DocumentPtr document = newLucene<Document>();

        // Add the text field.
        FieldPtr textField = newLucene<Field>(TEXT_FIELD, text, Field::STORE_YES, Field::INDEX_ANALYZED);
        document->add(textField);

        // Add the date/time field.
        String dateTimeString = DateTools::timeToString(time, DateTools::RESOLUTION_SECOND);
        FieldPtr dateTimeField = newLucene<Field>(DATE_TIME_FIELD, dateTimeString, Field::STORE_YES, Field::INDEX_NOT_ANALYZED);
        document->add(dateTimeField);

        return document;
    }
    /// Build a randomish document for both RAMDirectory and MemoryIndex, and run all the queries against it.
    void checkAgainstRAMDirectory() {
        StringStream fooField;
        StringStream termField;

        // add up to 250 terms to field "foo"
        int32_t fieldCount = random->nextInt(250) + 1;
        for (int32_t i = 0; i < fieldCount; ++i) {
            fooField << L" " << randomTerm();
        }

        // add up to 250 terms to field "foo"
        int32_t termCount = random->nextInt(250) + 1;
        for (int32_t i = 0; i < termCount; ++i) {
            termField << L" " << randomTerm();
        }

        RAMDirectoryPtr ramdir = newLucene<RAMDirectory>();
        AnalyzerPtr analyzer = randomAnalyzer();
        IndexWriterPtr writer = newLucene<IndexWriter>(ramdir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED);
        DocumentPtr doc = newLucene<Document>();
        FieldPtr field1 = newLucene<Field>(L"foo", fooField.str(), Field::STORE_NO, Field::INDEX_ANALYZED);
        FieldPtr field2 = newLucene<Field>(L"term", termField.str(), Field::STORE_NO, Field::INDEX_ANALYZED);
        doc->add(field1);
        doc->add(field2);
        writer->addDocument(doc);
        writer->close();

        MemoryIndexPtr memory = newLucene<MemoryIndex>();
        memory->addField(L"foo", fooField.str(), analyzer);
        memory->addField(L"term", termField.str(), analyzer);
        checkAllQueries(memory, ramdir, analyzer);
    }
    void createIndex(int32_t numHits) {
        int32_t numDocs = 500;

        DirectoryPtr directory = newLucene<SeekCountingDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
        writer->setUseCompoundFile(false);
        writer->setMaxBufferedDocs(10);
        for (int32_t i = 0; i < numDocs; ++i) {
            DocumentPtr doc = newLucene<Document>();
            String content;
            if (i % (numDocs / numHits) == 0) {
                // add a document that matches the query "term1 term2"
                content = term1 + L" " + term2;
            } else if (i % 15 == 0) {
                // add a document that only contains term1
                content = term1 + L" " + term1;
            } else {
                // add a document that contains term2 but not term 1
                content = term3 + L" " + term2;
            }

            doc->add(newLucene<Field>(field, content, Field::STORE_YES, Field::INDEX_ANALYZED));
            writer->addDocument(doc);
        }

        // make sure the index has only a single segment
        writer->optimize();
        writer->close();

        SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(directory);
        searcher = newLucene<IndexSearcher>(reader);
    }
TEST_F(LazyProxSkippingTest, testSeek) {
    DirectoryPtr directory = newLucene<RAMDirectory>();
    IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
    for (int32_t i = 0; i < 10; ++i) {
        DocumentPtr doc = newLucene<Document>();
        doc->add(newLucene<Field>(field, L"a b", Field::STORE_YES, Field::INDEX_ANALYZED));
        writer->addDocument(doc);
    }

    writer->close();
    IndexReaderPtr reader = IndexReader::open(directory, true);
    TermPositionsPtr tp = reader->termPositions();
    tp->seek(newLucene<Term>(field, L"b"));
    for (int32_t i = 0; i < 10; ++i) {
        tp->next();
        EXPECT_EQ(tp->doc(), i);
        EXPECT_EQ(tp->nextPosition(), 1);
    }
    tp->seek(newLucene<Term>(field, L"a"));
    for (int32_t i = 0; i < 10; ++i) {
        tp->next();
        EXPECT_EQ(tp->doc(), i);
        EXPECT_EQ(tp->nextPosition(), 0);
    }
}
TEST_F(BooleanScorerTest, testMethod) {
    static const String FIELD = L"category";

    RAMDirectoryPtr directory = newLucene<RAMDirectory>();
    Collection<String> values = newCollection<String>(L"1", L"2", L"3", L"4");

    IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
    for (int32_t i = 0; i < values.size(); ++i) {
        DocumentPtr doc = newLucene<Document>();
        doc->add(newLucene<Field>(FIELD, values[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
        writer->addDocument(doc);
    }
    writer->close();

    BooleanQueryPtr booleanQuery1 = newLucene<BooleanQuery>();
    booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"1")), BooleanClause::SHOULD);
    booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"2")), BooleanClause::SHOULD);

    BooleanQueryPtr query = newLucene<BooleanQuery>();
    query->add(booleanQuery1, BooleanClause::MUST);
    query->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"9")), BooleanClause::MUST_NOT);

    IndexSearcherPtr indexSearcher = newLucene<IndexSearcher>(directory, true);
    Collection<ScoreDocPtr> hits = indexSearcher->search(query, FilterPtr(), 1000)->scoreDocs;
    EXPECT_EQ(2, hits.size());
}
    /// One-time setup to initialise static members
    void setup() {
        // set the theoretical maximum term count for 8bit (see docs for the number)
        BooleanQuery::setMaxClauseCount(3 * 255 * 2 + 255);

        directory = newLucene<RAMDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthUNLIMITED);

        NumericFieldPtr field8 = newLucene<NumericField>(L"field8", 8, Field::STORE_YES, true);
        NumericFieldPtr field4 = newLucene<NumericField>(L"field4", 4, Field::STORE_YES, true);
        NumericFieldPtr field2 = newLucene<NumericField>(L"field2", 2, Field::STORE_YES, true);
        NumericFieldPtr fieldNoTrie = newLucene<NumericField>(L"field" + StringUtils::toString(INT_MAX), INT_MAX, Field::STORE_YES, true);
        NumericFieldPtr ascfield8 = newLucene<NumericField>(L"ascfield8", 8, Field::STORE_NO, true);
        NumericFieldPtr ascfield4 = newLucene<NumericField>(L"ascfield4", 4, Field::STORE_NO, true);
        NumericFieldPtr ascfield2 = newLucene<NumericField>(L"ascfield2", 2, Field::STORE_NO, true);

        DocumentPtr doc = newLucene<Document>();

        // add fields, that have a distance to test general functionality
        doc->add(field8);
        doc->add(field4);
        doc->add(field2);
        doc->add(fieldNoTrie);

        // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
        doc->add(ascfield8);
        doc->add(ascfield4);
        doc->add(ascfield2);

        // Add a series of noDocs docs with increasing int values
        for (int32_t l = 0; l < noDocs; ++l) {
            int32_t val = distance * l + startOffset;
            field8->setIntValue(val);
            field4->setIntValue(val);
            field2->setIntValue(val);
            fieldNoTrie->setIntValue(val);

            val = l - (noDocs / 2);
            ascfield8->setIntValue(val);
            ascfield4->setIntValue(val);
            ascfield2->setIntValue(val);
            writer->addDocument(doc);
        }

        writer->optimize();
        writer->close();
        searcher = newLucene<IndexSearcher>(directory, true);
    }
 DocumentPtr makeDocument(const String& docText)
 {
     DocumentPtr doc = newLucene<Document>();
     FieldPtr f = newLucene<Field>(L"f", docText, Field::STORE_NO, Field::INDEX_ANALYZED);
     f->setOmitNorms(true);
     doc->add(f);
     return doc;
 }
 void addDoc(RAMDirectoryPtr ramDir1, const String& s, bool create)
 {
     IndexWriterPtr iw = newLucene<IndexWriter>(ramDir1, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), create, IndexWriter::MaxFieldLengthLIMITED);
     DocumentPtr doc = newLucene<Document>();
     doc->add(newLucene<Field>(L"body", s, Field::STORE_YES, Field::INDEX_ANALYZED));
     iw->addDocument(doc);
     iw->close();
 }
 ExplanationsFixture::ExplanationsFixture()
 {
     qp = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, FIELD, newLucene<WhitespaceAnalyzer>());
     docFields = newCollection<String>(L"w1 w2 w3 w4 w5", L"w1 w3 w2 w3 zz", L"w1 xx w2 yy w3", L"w1 w3 xx w2 yy w3 zz");
     
     RAMDirectoryPtr directory = newLucene<RAMDirectory>();
     IndexWriterPtr writer= newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
     
     for (int32_t i = 0; i < docFields.size(); ++i)
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(KEY, StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
         doc->add(newLucene<Field>(FIELD, docFields[i], Field::STORE_NO, Field::INDEX_ANALYZED));
         writer->addDocument(doc);
     }
     writer->close();
     searcher = newLucene<IndexSearcher>(directory, true);
 }
static void addDocs2(IndexWriterPtr writer, int32_t numDocs)
{
    for (int32_t i = 0; i < numDocs; ++i)
    {
        DocumentPtr doc = newLucene<Document>();
        doc->add(newLucene<Field>(L"content", L"bbb", Field::STORE_NO, Field::INDEX_ANALYZED));
        writer->addDocument(doc);
    }
}
 DocumentPtr newDoc() {
     DocumentPtr d = newLucene<Document>();
     double boost = nextNorm();
     for (int32_t i = 0; i < 10; ++i) {
         FieldPtr f = newLucene<Field>(L"f" + StringUtils::toString(i), L"v" + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED);
         f->setBoost(boost);
         d->add(f);
     }
     return d;
 }
Example #21
0
 virtual void run() {
     try {
         for (int32_t j = 0; j < numDocs; ++j) {
             DocumentPtr d = newLucene<Document>();
             d->add(newLucene<Field>(L"test", newLucene<PoolingPayloadTokenStream>(pool)));
             writer->addDocument(d);
         }
     } catch (LuceneException& e) {
         FAIL() << "Unexpected exception: " << e.getError();
     }
 }
Example #22
0
/// Tests whether the DocumentWriter and SegmentMerger correctly enable the payload bit in the FieldInfo
TEST_F(PayloadsTest, testPayloadFieldBit) {
    DirectoryPtr ram = newLucene<RAMDirectory>();
    PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>();
    IndexWriterPtr writer = newLucene<IndexWriter>(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);
    DocumentPtr d = newLucene<Document>();
    // this field won't have any payloads
    d->add(newLucene<Field>(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED));
    // this field will have payloads in all docs, however not for all term positions,
    // so this field is used to check if the DocumentWriter correctly enables the payloads bit
    // even if only some term positions have payloads
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
    // enabled in only some documents
    d->add(newLucene<Field>(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    // only add payload data for field f2

    ByteArray someData(ByteArray::newInstance(8));
    uint8_t input[8] = { 's', 'o', 'm', 'e', 'd', 'a', 't', 'a' };
    std::memcpy(someData.get(), input, 8);

    analyzer->setPayloadData(L"f2", 1, someData, 0, 1);

    writer->addDocument(d);
    // flush
    writer->close();

    SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(ram);
    FieldInfosPtr fi = reader->fieldInfos();
    EXPECT_TRUE(!fi->fieldInfo(L"f1")->storePayloads);
    EXPECT_TRUE(fi->fieldInfo(L"f2")->storePayloads);
    EXPECT_TRUE(!fi->fieldInfo(L"f3")->storePayloads);
    reader->close();

    // now we add another document which has payloads for field f3 and verify if the SegmentMerger
    // enabled payloads for that field
    writer = newLucene<IndexWriter>(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);
    d = newLucene<Document>();
    d->add(newLucene<Field>(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    // add payload data for field f2 and f3
    analyzer->setPayloadData(L"f2", someData, 0, 1);
    analyzer->setPayloadData(L"f3", someData, 0, 3);
    writer->addDocument(d);
    // force merge
    writer->optimize();
    // flush
    writer->close();

    reader = SegmentReader::getOnlySegmentReader(ram);
    fi = reader->fieldInfos();
    EXPECT_TRUE(!fi->fieldInfo(L"f1")->storePayloads);
    EXPECT_TRUE(fi->fieldInfo(L"f2")->storePayloads);
    EXPECT_TRUE(fi->fieldInfo(L"f3")->storePayloads);
    reader->close();
}
static void fillIndex(DirectoryPtr dir, int32_t start, int32_t numDocs)
{
    IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED);
    writer->setMergeFactor(2);
    writer->setMaxBufferedDocs(2);
    
    for (int32_t i = start; i < (start + numDocs); ++i)
    {
        DocumentPtr temp = newLucene<Document>();
        temp->add(newLucene<Field>(L"count", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
        writer->addDocument(temp);
    }
    writer->close();
}
 TermScorerFixture()
 {
     values = newCollection<String>(L"all", L"dogs dogs", L"like", L"playing", L"fetch", L"all");
     directory = newLucene<RAMDirectory>();
     IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
     for (int32_t i = 0; i < values.size(); ++i)
     {
         DocumentPtr doc = newLucene<Document>();
         doc->add(newLucene<Field>(FIELD, values[i], Field::STORE_YES, Field::INDEX_ANALYZED));
         writer->addDocument(doc);
     }
     writer->close();
     indexSearcher = newLucene<IndexSearcher>(directory, false);
     indexReader = indexSearcher->getIndexReader();
 }
 virtual void run()
 {
     try
     {
         for (int32_t j = 0; j < numIter; ++j)
         {
             writerFinal->optimize(false);
             for (int32_t k = 0; k < 17 * (1 + iFinal); ++k)
             {
                 DocumentPtr d = newLucene<Document>();
                 d->add(newLucene<Field>(L"id", StringUtils::toString(iterFinal) + L"_" + StringUtils::toString(iFinal) + L"_" + StringUtils::toString(j) + L"_" + StringUtils::toString(k), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
                 d->add(newLucene<Field>(L"contents", intToEnglish(iFinal + k), Field::STORE_NO, Field::INDEX_ANALYZED));
                 writer->addDocument(d);
             }
             for (int32_t k = 0; k < 9 * (1 + iFinal); ++k)
                 writerFinal->deleteDocuments(newLucene<Term>(L"id", StringUtils::toString(iterFinal) + L"_" + StringUtils::toString(iFinal) + L"_" + StringUtils::toString(j) + L"_" + StringUtils::toString(k)));
             writerFinal->optimize();
         }
     }
     catch (LuceneException& e)
     {
         BOOST_FAIL("Unexpected exception: " << e.getError());
     }
 }
 MultiThreadTermVectorsFixture()
 {
     directory = newLucene<RAMDirectory>();
     numDocs = 100;
     numThreads = 3;
     
     IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<SimpleAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
     for (int32_t i = 0; i < numDocs; ++i)
     {
         DocumentPtr doc = newLucene<Document>();
         FieldablePtr fld = newLucene<Field>(L"field", intToEnglish(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED, Field::TERM_VECTOR_YES);
         doc->add(fld);
         writer->addDocument(doc);
     }
     writer->close();
 }
Example #27
0
void
FuzzyIndex::appendFields( const Tomahawk::IndexData& data )
{
    try
    {
        DocumentPtr doc = newLucene<Document>();

        if ( !data.track.isEmpty() )
        {
            doc->add(newLucene<Field>( L"fulltext", Tomahawk::DatabaseImpl::sortname( QString( "%1 %2" ).arg( data.artist ).arg( data.track ) ).toStdWString(),
                                       Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );

            doc->add(newLucene<Field>( L"track", Tomahawk::DatabaseImpl::sortname( data.track ).toStdWString(),
                                       Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );

            doc->add(newLucene<Field>( L"artist", Tomahawk::DatabaseImpl::sortname( data.artist ).toStdWString(),
                                       Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );

            doc->add(newLucene<Field>( L"artistid", QString::number( data.artistId ).toStdWString(),
                                       Field::STORE_YES, Field::INDEX_NO ) );

            doc->add(newLucene<Field>( L"trackid", QString::number( data.id ).toStdWString(),
                                       Field::STORE_YES, Field::INDEX_NO ) );
        }
        else if ( !data.album.isEmpty() )
        {
            doc->add(newLucene<Field>( L"album", Tomahawk::DatabaseImpl::sortname( data.album ).toStdWString(),
                                       Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );

            doc->add(newLucene<Field>( L"albumid", QString::number( data.id ).toStdWString(),
                                       Field::STORE_YES, Field::INDEX_NO ) );
        }
        else
            return;

        m_luceneWriter->addDocument( doc );
    }
    catch( LuceneException& error )
    {
        tDebug() << "Caught Lucene error:" << error.what();

        QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
    }
}
Example #28
0
/// Builds an index with payloads in the given Directory and performs different
/// tests to verify the payload encoding
static void encodingTest(const DirectoryPtr& dir) {
    PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>();
    IndexWriterPtr writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);

    // should be in sync with value in TermInfosWriter
    int32_t skipInterval = 16;

    int32_t numTerms = 5;
    String fieldName = L"f1";

    int32_t numDocs = skipInterval + 1;
    // create content for the test documents with just a few terms
    Collection<TermPtr> terms = generateTerms(fieldName, numTerms);
    StringStream sb;
    for (Collection<TermPtr>::iterator term = terms.begin(); term != terms.end(); ++term) {
        sb << (*term)->text() << L" ";
    }
    String content = sb.str();

    int32_t payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
    ByteArray payloadData = generateRandomData(payloadDataLength);

    DocumentPtr d = newLucene<Document>();
    d->add(newLucene<Field>(fieldName, content, Field::STORE_NO, Field::INDEX_ANALYZED));

    // add the same document multiple times to have the same payload lengths for all
    // occurrences within two consecutive skip intervals
    int32_t offset = 0;
    for (int32_t i = 0; i < 2 * numDocs; ++i) {
        analyzer->setPayloadData(fieldName, payloadData, offset, 1);
        offset += numTerms;
        writer->addDocument(d);
    }

    // make sure we create more than one segment to test merging
    writer->commit();

    for (int32_t i = 0; i < numDocs; ++i) {
        analyzer->setPayloadData(fieldName, payloadData, offset, i);
        offset += i * numTerms;
        writer->addDocument(d);
    }

    writer->optimize();
    // flush
    writer->close();

    // Verify the index
    IndexReaderPtr reader = IndexReader::open(dir, true);

    ByteArray verifyPayloadData(ByteArray::newInstance(payloadDataLength));
    offset = 0;
    Collection<TermPositionsPtr> tps = Collection<TermPositionsPtr>::newInstance(numTerms);
    for (int32_t i = 0; i < numTerms; ++i) {
        tps[i] = reader->termPositions(terms[i]);
    }

    while (tps[0]->next()) {
        for (int32_t i = 1; i < numTerms; ++i) {
            tps[i]->next();
        }
        int32_t freq = tps[0]->freq();

        for (int32_t i = 0; i < freq; ++i) {
            for (int32_t j = 0; j < numTerms; ++j) {
                tps[j]->nextPosition();
                tps[j]->getPayload(verifyPayloadData, offset);
                offset += tps[j]->getPayloadLength();
            }
        }
    }

    for (int32_t i = 0; i < numTerms; ++i) {
        tps[i]->close();
    }

    EXPECT_TRUE(payloadData.equals(verifyPayloadData));

    // test lazy skipping
    TermPositionsPtr tp = reader->termPositions(terms[0]);
    tp->next();
    tp->nextPosition();
    // now we don't read this payload
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    ByteArray payload = tp->getPayload(ByteArray(), 0);
    EXPECT_EQ(payload[0], payloadData[numTerms]);
    tp->nextPosition();

    // we don't read this payload and skip to a different document
    tp->skipTo(5);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    payload = tp->getPayload(ByteArray(), 0);
    EXPECT_EQ(payload[0], payloadData[5 * numTerms]);

    // Test different lengths at skip points
    tp->seek(terms[1]);
    tp->next();
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(2 * skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(3 * skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(3 * skipInterval - 2 * numDocs - 1, tp->getPayloadLength());

    // Test multiple call of getPayload()
    tp->getPayload(ByteArray(), 0);

    // it is forbidden to call getPayload() more than once without calling nextPosition()
    try {
        tp->getPayload(ByteArray(), 0);
    } catch (IOException& e) {
        EXPECT_TRUE(check_exception(LuceneException::IO)(e));
    }

    reader->close();

    // test long payload
    analyzer = newLucene<PayloadAnalyzer>();
    writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);
    String singleTerm = L"lucene";

    d = newLucene<Document>();
    d->add(newLucene<Field>(fieldName, singleTerm, Field::STORE_NO, Field::INDEX_ANALYZED));
    // add a payload whose length is greater than the buffer size of BufferedIndexOutput
    payloadData = generateRandomData(2000);
    analyzer->setPayloadData(fieldName, payloadData, 100, 1500);
    writer->addDocument(d);

    writer->optimize();
    // flush
    writer->close();

    reader = IndexReader::open(dir, true);
    tp = reader->termPositions(newLucene<Term>(fieldName, singleTerm));
    tp->next();
    tp->nextPosition();

    verifyPayloadData.resize(tp->getPayloadLength());
    tp->getPayload(verifyPayloadData, 0);
    ByteArray portion(ByteArray::newInstance(1500));
    MiscUtils::arrayCopy(payloadData.get(), 100, portion.get(), 0, 1500);

    EXPECT_TRUE(portion.equals(verifyPayloadData));

    reader->close();
}
 void add(const String& value, IndexWriterPtr iw)
 {
     DocumentPtr d = newLucene<Document>();
     d->add(newLucene<Field>(FIELD_NAME, value, Field::STORE_YES, Field::INDEX_ANALYZED));
     iw->addDocument(d);
 }
 void addDoc(const IndexWriterPtr& writer, const String& value) {
     DocumentPtr doc = newLucene<Document>();
     doc->add(newLucene<Field>(L"content", value, Field::STORE_NO, Field::INDEX_ANALYZED));
     writer->addDocument(doc);
 }