C++ (Cpp) IndexWriterPtr::optimize 예제들

예제 #1

0

파일 보기

파일: IndexReaderCloneNormsTest.cpp 프로젝트: 304471720/LucenePlusPlus

    void createIndex(const DirectoryPtr& dir, bool multiSegment) {
        IndexWriter::unlock(dir);
        IndexWriterPtr w = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), IndexWriter::MaxFieldLengthLIMITED);

        w->setMergePolicy(newLucene<LogDocMergePolicy>(w));

        for (int32_t i = 0; i < 100; ++i) {
            w->addDocument(createDocument(i, 4));
            if (multiSegment && (i % 10) == 0) {
                w->commit();
            }
        }

        if (!multiSegment) {
            w->optimize();
        }

        w->close();

        IndexReaderPtr r = IndexReader::open(dir, false);
        if (multiSegment) {
            EXPECT_TRUE(r->getSequentialSubReaders().size() > 1);
        } else {
            EXPECT_EQ(r->getSequentialSubReaders().size(), 1);
        }
        r->close();
    }

예제 #2

0

파일 보기

파일: LazyProxSkippingTest.cpp 프로젝트: 304471720/LucenePlusPlus

    void createIndex(int32_t numHits) {
        int32_t numDocs = 500;

        DirectoryPtr directory = newLucene<SeekCountingDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
        writer->setUseCompoundFile(false);
        writer->setMaxBufferedDocs(10);
        for (int32_t i = 0; i < numDocs; ++i) {
            DocumentPtr doc = newLucene<Document>();
            String content;
            if (i % (numDocs / numHits) == 0) {
                // add a document that matches the query "term1 term2"
                content = term1 + L" " + term2;
            } else if (i % 15 == 0) {
                // add a document that only contains term1
                content = term1 + L" " + term1;
            } else {
                // add a document that contains term2 but not term 1
                content = term3 + L" " + term2;
            }

            doc->add(newLucene<Field>(field, content, Field::STORE_YES, Field::INDEX_ANALYZED));
            writer->addDocument(doc);
        }

        // make sure the index has only a single segment
        writer->optimize();
        writer->close();

        SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(directory);
        searcher = newLucene<IndexSearcher>(reader);
    }

예제 #3

0

파일 보기

파일: SegmentTermEnumTest.cpp 프로젝트: 304471720/LucenePlusPlus

TEST_F(SegmentTermEnumTest, testTermEnum) {
    DirectoryPtr dir = newLucene<RAMDirectory>();
    IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

    // ADD 100 documents with term : aaa
    // add 100 documents with terms: aaa bbb
    // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100
    for (int32_t i = 0; i < 100; ++i) {
        addDoc(writer, L"aaa");
        addDoc(writer, L"aaa bbb");
    }

    writer->close();

    // verify document frequency of terms in an unoptimized index
    verifyDocFreq(dir);

    // merge segments by optimizing the index
    writer = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), false, IndexWriter::MaxFieldLengthLIMITED);
    writer->optimize();
    writer->close();

    // verify document frequency of terms in an optimized index
    verifyDocFreq(dir);
}

예제 #4

0

파일 보기

파일: IndexWriterReaderTest.cpp 프로젝트: 304471720/LucenePlusPlus

static void createIndexNoClose(bool multiSegment, const String& indexName, const IndexWriterPtr& w) {
    for (int32_t i = 0; i < 100; ++i) {
        w->addDocument(createDocument(i, indexName, 4));
    }
    if (!multiSegment) {
        w->optimize();
    }
}

예제 #5

0

파일 보기

파일: PayloadsTest.cpp 프로젝트: 304471720/LucenePlusPlus

/// Tests whether the DocumentWriter and SegmentMerger correctly enable the payload bit in the FieldInfo
TEST_F(PayloadsTest, testPayloadFieldBit) {
    DirectoryPtr ram = newLucene<RAMDirectory>();
    PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>();
    IndexWriterPtr writer = newLucene<IndexWriter>(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);
    DocumentPtr d = newLucene<Document>();
    // this field won't have any payloads
    d->add(newLucene<Field>(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED));
    // this field will have payloads in all docs, however not for all term positions,
    // so this field is used to check if the DocumentWriter correctly enables the payloads bit
    // even if only some term positions have payloads
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
    // enabled in only some documents
    d->add(newLucene<Field>(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    // only add payload data for field f2

    ByteArray someData(ByteArray::newInstance(8));
    uint8_t input[8] = { 's', 'o', 'm', 'e', 'd', 'a', 't', 'a' };
    std::memcpy(someData.get(), input, 8);

    analyzer->setPayloadData(L"f2", 1, someData, 0, 1);

    writer->addDocument(d);
    // flush
    writer->close();

    SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(ram);
    FieldInfosPtr fi = reader->fieldInfos();
    EXPECT_TRUE(!fi->fieldInfo(L"f1")->storePayloads);
    EXPECT_TRUE(fi->fieldInfo(L"f2")->storePayloads);
    EXPECT_TRUE(!fi->fieldInfo(L"f3")->storePayloads);
    reader->close();

    // now we add another document which has payloads for field f3 and verify if the SegmentMerger
    // enabled payloads for that field
    writer = newLucene<IndexWriter>(ram, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);
    d = newLucene<Document>();
    d->add(newLucene<Field>(L"f1", L"This field has no payloads", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f2", L"This field has payloads in all docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    d->add(newLucene<Field>(L"f3", L"This field has payloads in some docs", Field::STORE_NO, Field::INDEX_ANALYZED));
    // add payload data for field f2 and f3
    analyzer->setPayloadData(L"f2", someData, 0, 1);
    analyzer->setPayloadData(L"f3", someData, 0, 3);
    writer->addDocument(d);
    // force merge
    writer->optimize();
    // flush
    writer->close();

    reader = SegmentReader::getOnlySegmentReader(ram);
    fi = reader->fieldInfos();
    EXPECT_TRUE(!fi->fieldInfo(L"f1")->storePayloads);
    EXPECT_TRUE(fi->fieldInfo(L"f2")->storePayloads);
    EXPECT_TRUE(fi->fieldInfo(L"f3")->storePayloads);
    reader->close();
}

예제 #6

0

파일 보기

파일: ThreadedOptimizeTest.cpp 프로젝트: ustramooner/LucenePlusPlus

 virtual void run()
 {
     try
     {
         for (int32_t j = 0; j < numIter; ++j)
         {
             writerFinal->optimize(false);
             for (int32_t k = 0; k < 17 * (1 + iFinal); ++k)
             {
                 DocumentPtr d = newLucene<Document>();
                 d->add(newLucene<Field>(L"id", StringUtils::toString(iterFinal) + L"_" + StringUtils::toString(iFinal) + L"_" + StringUtils::toString(j) + L"_" + StringUtils::toString(k), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
                 d->add(newLucene<Field>(L"contents", intToEnglish(iFinal + k), Field::STORE_NO, Field::INDEX_ANALYZED));
                 writer->addDocument(d);
             }
             for (int32_t k = 0; k < 9 * (1 + iFinal); ++k)
                 writerFinal->deleteDocuments(newLucene<Term>(L"id", StringUtils::toString(iterFinal) + L"_" + StringUtils::toString(iFinal) + L"_" + StringUtils::toString(j) + L"_" + StringUtils::toString(k)));
             writerFinal->optimize();
         }
     }
     catch (LuceneException& e)
     {
         BOOST_FAIL("Unexpected exception: " << e.getError());
     }
 }

예제 #7

0

파일 보기

파일: NumericRangeQuery32Test.cpp 프로젝트: 304471720/LucenePlusPlus

    /// One-time setup to initialise static members
    void setup() {
        // set the theoretical maximum term count for 8bit (see docs for the number)
        BooleanQuery::setMaxClauseCount(3 * 255 * 2 + 255);

        directory = newLucene<RAMDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthUNLIMITED);

        NumericFieldPtr field8 = newLucene<NumericField>(L"field8", 8, Field::STORE_YES, true);
        NumericFieldPtr field4 = newLucene<NumericField>(L"field4", 4, Field::STORE_YES, true);
        NumericFieldPtr field2 = newLucene<NumericField>(L"field2", 2, Field::STORE_YES, true);
        NumericFieldPtr fieldNoTrie = newLucene<NumericField>(L"field" + StringUtils::toString(INT_MAX), INT_MAX, Field::STORE_YES, true);
        NumericFieldPtr ascfield8 = newLucene<NumericField>(L"ascfield8", 8, Field::STORE_NO, true);
        NumericFieldPtr ascfield4 = newLucene<NumericField>(L"ascfield4", 4, Field::STORE_NO, true);
        NumericFieldPtr ascfield2 = newLucene<NumericField>(L"ascfield2", 2, Field::STORE_NO, true);

        DocumentPtr doc = newLucene<Document>();

        // add fields, that have a distance to test general functionality
        doc->add(field8);
        doc->add(field4);
        doc->add(field2);
        doc->add(fieldNoTrie);

        // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
        doc->add(ascfield8);
        doc->add(ascfield4);
        doc->add(ascfield2);

        // Add a series of noDocs docs with increasing int values
        for (int32_t l = 0; l < noDocs; ++l) {
            int32_t val = distance * l + startOffset;
            field8->setIntValue(val);
            field4->setIntValue(val);
            field2->setIntValue(val);
            fieldNoTrie->setIntValue(val);

            val = l - (noDocs / 2);
            ascfield8->setIntValue(val);
            ascfield4->setIntValue(val);
            ascfield2->setIntValue(val);
            writer->addDocument(doc);
        }

        writer->optimize();
        writer->close();
        searcher = newLucene<IndexSearcher>(directory, true);
    }

예제 #8

0

파일 보기

파일: IndexWriterReaderTest.cpp 프로젝트: 304471720/LucenePlusPlus

 void doBody(int32_t j, Collection<DirectoryPtr> dirs) {
     switch (j % 4) {
     case 0:
         mainWriter->addIndexesNoOptimize(dirs);
         mainWriter->optimize();
         break;
     case 1:
         mainWriter->addIndexesNoOptimize(dirs);
         numAddIndexesNoOptimize->incrementAndGet();
         break;
     case 2:
         mainWriter->addIndexes(readers);
         break;
     case 3:
         mainWriter->commit();
         break;
     }
     count->addAndGet(dirs.size() * NUM_INIT_DOCS);
 }

예제 #9

0

파일 보기

파일: DateSortTest.cpp 프로젝트: 304471720/LucenePlusPlus

    DateSortTest() {
        // Create an index writer.
        directory = newLucene<RAMDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        // oldest doc:
        // Add the first document.  text = "Document 1"  dateTime = Oct 10 03:25:22 EDT 2007
        writer->addDocument(createDocument(L"Document 1", 1192001122000LL));
        // Add the second document.  text = "Document 2"  dateTime = Oct 10 03:25:26 EDT 2007
        writer->addDocument(createDocument(L"Document 2", 1192001126000LL));
        // Add the third document.  text = "Document 3"  dateTime = Oct 11 07:12:13 EDT 2007
        writer->addDocument(createDocument(L"Document 3", 1192101133000LL));
        // Add the fourth document.  text = "Document 4"  dateTime = Oct 11 08:02:09 EDT 2007
        writer->addDocument(createDocument(L"Document 4", 1192104129000LL));
        // latest doc:
        // Add the fifth document.  text = "Document 5"  dateTime = Oct 12 13:25:43 EDT 2007
        writer->addDocument(createDocument(L"Document 5", 1192209943000LL));

        writer->optimize();
        writer->close();
    }

예제 #10

0

파일 보기

파일: PayloadsTest.cpp 프로젝트: 304471720/LucenePlusPlus

/// Builds an index with payloads in the given Directory and performs different
/// tests to verify the payload encoding
static void encodingTest(const DirectoryPtr& dir) {
    PayloadAnalyzerPtr analyzer = newLucene<PayloadAnalyzer>();
    IndexWriterPtr writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);

    // should be in sync with value in TermInfosWriter
    int32_t skipInterval = 16;

    int32_t numTerms = 5;
    String fieldName = L"f1";

    int32_t numDocs = skipInterval + 1;
    // create content for the test documents with just a few terms
    Collection<TermPtr> terms = generateTerms(fieldName, numTerms);
    StringStream sb;
    for (Collection<TermPtr>::iterator term = terms.begin(); term != terms.end(); ++term) {
        sb << (*term)->text() << L" ";
    }
    String content = sb.str();

    int32_t payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
    ByteArray payloadData = generateRandomData(payloadDataLength);

    DocumentPtr d = newLucene<Document>();
    d->add(newLucene<Field>(fieldName, content, Field::STORE_NO, Field::INDEX_ANALYZED));

    // add the same document multiple times to have the same payload lengths for all
    // occurrences within two consecutive skip intervals
    int32_t offset = 0;
    for (int32_t i = 0; i < 2 * numDocs; ++i) {
        analyzer->setPayloadData(fieldName, payloadData, offset, 1);
        offset += numTerms;
        writer->addDocument(d);
    }

    // make sure we create more than one segment to test merging
    writer->commit();

    for (int32_t i = 0; i < numDocs; ++i) {
        analyzer->setPayloadData(fieldName, payloadData, offset, i);
        offset += i * numTerms;
        writer->addDocument(d);
    }

    writer->optimize();
    // flush
    writer->close();

    // Verify the index
    IndexReaderPtr reader = IndexReader::open(dir, true);

    ByteArray verifyPayloadData(ByteArray::newInstance(payloadDataLength));
    offset = 0;
    Collection<TermPositionsPtr> tps = Collection<TermPositionsPtr>::newInstance(numTerms);
    for (int32_t i = 0; i < numTerms; ++i) {
        tps[i] = reader->termPositions(terms[i]);
    }

    while (tps[0]->next()) {
        for (int32_t i = 1; i < numTerms; ++i) {
            tps[i]->next();
        }
        int32_t freq = tps[0]->freq();

        for (int32_t i = 0; i < freq; ++i) {
            for (int32_t j = 0; j < numTerms; ++j) {
                tps[j]->nextPosition();
                tps[j]->getPayload(verifyPayloadData, offset);
                offset += tps[j]->getPayloadLength();
            }
        }
    }

    for (int32_t i = 0; i < numTerms; ++i) {
        tps[i]->close();
    }

    EXPECT_TRUE(payloadData.equals(verifyPayloadData));

    // test lazy skipping
    TermPositionsPtr tp = reader->termPositions(terms[0]);
    tp->next();
    tp->nextPosition();
    // now we don't read this payload
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    ByteArray payload = tp->getPayload(ByteArray(), 0);
    EXPECT_EQ(payload[0], payloadData[numTerms]);
    tp->nextPosition();

    // we don't read this payload and skip to a different document
    tp->skipTo(5);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    payload = tp->getPayload(ByteArray(), 0);
    EXPECT_EQ(payload[0], payloadData[5 * numTerms]);

    // Test different lengths at skip points
    tp->seek(terms[1]);
    tp->next();
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(2 * skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(1, tp->getPayloadLength());
    tp->skipTo(3 * skipInterval - 1);
    tp->nextPosition();
    EXPECT_EQ(3 * skipInterval - 2 * numDocs - 1, tp->getPayloadLength());

    // Test multiple call of getPayload()
    tp->getPayload(ByteArray(), 0);

    // it is forbidden to call getPayload() more than once without calling nextPosition()
    try {
        tp->getPayload(ByteArray(), 0);
    } catch (IOException& e) {
        EXPECT_TRUE(check_exception(LuceneException::IO)(e));
    }

    reader->close();

    // test long payload
    analyzer = newLucene<PayloadAnalyzer>();
    writer = newLucene<IndexWriter>(dir, analyzer, true, IndexWriter::MaxFieldLengthLIMITED);
    String singleTerm = L"lucene";

    d = newLucene<Document>();
    d->add(newLucene<Field>(fieldName, singleTerm, Field::STORE_NO, Field::INDEX_ANALYZED));
    // add a payload whose length is greater than the buffer size of BufferedIndexOutput
    payloadData = generateRandomData(2000);
    analyzer->setPayloadData(fieldName, payloadData, 100, 1500);
    writer->addDocument(d);

    writer->optimize();
    // flush
    writer->close();

    reader = IndexReader::open(dir, true);
    tp = reader->termPositions(newLucene<Term>(fieldName, singleTerm));
    tp->next();
    tp->nextPosition();

    verifyPayloadData.resize(tp->getPayloadLength());
    tp->getPayload(verifyPayloadData, 0);
    ByteArray portion(ByteArray::newInstance(1500));
    MiscUtils::arrayCopy(payloadData.get(), 100, portion.get(), 0, 1500);

    EXPECT_TRUE(portion.equals(verifyPayloadData));

    reader->close();
}

예제 #11

0

파일 보기

파일: SegmentTermDocsTest.cpp 프로젝트: 304471720/LucenePlusPlus

    void checkSkipTo(int32_t indexDivisor) {
        DirectoryPtr dir = newLucene<RAMDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(dir, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        TermPtr ta = newLucene<Term>(L"content", L"aaa");
        for (int32_t i = 0; i < 10; ++i) {
            addDoc(writer, L"aaa aaa aaa aaa");
        }

        TermPtr tb = newLucene<Term>(L"content", L"bbb");
        for (int32_t i = 0; i < 16; ++i) {
            addDoc(writer, L"bbb bbb bbb bbb");
        }

        TermPtr tc = newLucene<Term>(L"content", L"ccc");
        for (int32_t i = 0; i < 50; ++i) {
            addDoc(writer, L"ccc ccc ccc ccc");
        }

        // assure that we deal with a single segment
        writer->optimize();
        writer->close();

        IndexReaderPtr reader = IndexReader::open(dir, IndexDeletionPolicyPtr(), true, indexDivisor);

        TermDocsPtr tdocs = reader->termDocs();

        // without optimization (assumption skipInterval == 16)

        // with next
        tdocs->seek(ta);
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(0, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(1, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->skipTo(0));
        EXPECT_EQ(2, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(4));
        EXPECT_EQ(4, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(9));
        EXPECT_EQ(9, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(10));

        // without next
        tdocs->seek(ta);
        EXPECT_TRUE(tdocs->skipTo(0));
        EXPECT_EQ(0, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(4));
        EXPECT_EQ(4, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(9));
        EXPECT_EQ(9, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(10));

        // exactly skipInterval documents and therefore with optimization

        // with next
        tdocs->seek(tb);
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(10, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(11, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(12, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(15));
        EXPECT_EQ(15, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(24));
        EXPECT_EQ(24, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(25));
        EXPECT_EQ(25, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(26));

        // without next
        tdocs->seek(tb);
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(10, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(15));
        EXPECT_EQ(15, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(24));
        EXPECT_EQ(24, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(25));
        EXPECT_EQ(25, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(26));

        // much more than skipInterval documents and therefore with optimization

        // with next
        tdocs->seek(tc);
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(26, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->next());
        EXPECT_EQ(27, tdocs->doc());
        EXPECT_EQ(4, tdocs->freq());
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(28, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(40));
        EXPECT_EQ(40, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(57));
        EXPECT_EQ(57, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(74));
        EXPECT_EQ(74, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(75));
        EXPECT_EQ(75, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(76));

        // without next
        tdocs->seek(tc);
        EXPECT_TRUE(tdocs->skipTo(5));
        EXPECT_EQ(26, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(40));
        EXPECT_EQ(40, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(57));
        EXPECT_EQ(57, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(74));
        EXPECT_EQ(74, tdocs->doc());
        EXPECT_TRUE(tdocs->skipTo(75));
        EXPECT_EQ(75, tdocs->doc());
        EXPECT_TRUE(!tdocs->skipTo(76));

        tdocs->close();
        reader->close();
        dir->close();
    }

예제 #12

0

파일 보기

파일: SimpleExplanationsOfNonMatchesTest.cpp 프로젝트: 304471720/LucenePlusPlus

TEST_F(SimpleExplanationsOfNonMatchesTest, testTermQueryMultiSearcherExplain) {
    // creating two directories for indices
    DirectoryPtr indexStoreA = newLucene<MockRAMDirectory>();
    DirectoryPtr indexStoreB = newLucene<MockRAMDirectory>();

    DocumentPtr lDoc = newLucene<Document>();
    lDoc->add(newLucene<Field>(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED));
    DocumentPtr lDoc2 = newLucene<Document>();
    lDoc2->add(newLucene<Field>(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED));
    DocumentPtr lDoc3 = newLucene<Document>();
    lDoc3->add(newLucene<Field>(L"handle", L"1 2", Field::STORE_YES, Field::INDEX_ANALYZED));

    IndexWriterPtr writerA = newLucene<IndexWriter>(indexStoreA, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED);
    IndexWriterPtr writerB = newLucene<IndexWriter>(indexStoreB, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED);

    writerA->addDocument(lDoc);
    writerA->addDocument(lDoc2);
    writerA->optimize();
    writerA->close();

    writerB->addDocument(lDoc3);
    writerB->close();

    QueryParserPtr parser = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, L"fulltext", newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT));
    QueryPtr query = parser->parse(L"handle:1");

    Collection<SearchablePtr> searchers = newCollection<SearchablePtr>(
            newLucene<IndexSearcher>(indexStoreB, true),
            newLucene<IndexSearcher>(indexStoreA, true)
                                          );
    SearcherPtr mSearcher = newLucene<MultiSearcher>(searchers);
    Collection<ScoreDocPtr> hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs;

    EXPECT_EQ(3, hits.size());

    ExplanationPtr explain = mSearcher->explain(query, hits[0]->doc);
    String exp = explain->toString();
    EXPECT_TRUE(exp.find(L"maxDocs=3") != String::npos);
    EXPECT_TRUE(exp.find(L"docFreq=3") != String::npos);

    query = parser->parse(L"handle:\"1 2\"");
    hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs;

    EXPECT_EQ(3, hits.size());

    explain = mSearcher->explain(query, hits[0]->doc);
    exp = explain->toString();
    EXPECT_TRUE(exp.find(L"1=3") != String::npos);
    EXPECT_TRUE(exp.find(L"2=3") != String::npos);

    query = newLucene<SpanNearQuery>(newCollection<SpanQueryPtr>(newLucene<SpanTermQuery>(newLucene<Term>(L"handle", L"1")), newLucene<SpanTermQuery>(newLucene<Term>(L"handle", L"2"))), 0, true);
    hits = mSearcher->search(query, FilterPtr(), 1000)->scoreDocs;

    EXPECT_EQ(3, hits.size());

    explain = mSearcher->explain(query, hits[0]->doc);
    exp = explain->toString();
    EXPECT_TRUE(exp.find(L"1=3") != String::npos);
    EXPECT_TRUE(exp.find(L"2=3") != String::npos);
    mSearcher->close();
}

예제 #13

0

파일 보기

파일: IndexReaderCloneNormsTest.cpp 프로젝트: 304471720/LucenePlusPlus

/// Test that norms values are preserved as the index is maintained.  Including separate norms.
/// Including merging indexes with separate norms. Including optimize.
TEST_F(IndexReaderCloneNormsTest, testNorms) {
    // test with a single index: index1
    String indexDir1(FileUtils::joinPath(getTempDir(), L"lucenetestindex1"));
    DirectoryPtr dir1 = FSDirectory::open(indexDir1);
    IndexWriter::unlock(dir1);

    norms = Collection<double>::newInstance();
    modifiedNorms = Collection<double>::newInstance();

    createIndex(dir1);
    doTestNorms(dir1);

    // test with a single index: index2
    Collection<double> norms1 = norms;
    Collection<double> modifiedNorms1 = modifiedNorms;
    int32_t numDocNorms1 = numDocNorms;

    norms = Collection<double>::newInstance();
    modifiedNorms = Collection<double>::newInstance();
    numDocNorms = 0;

    String indexDir2(FileUtils::joinPath(getTempDir(), L"lucenetestindex2"));
    DirectoryPtr dir2 = FSDirectory::open(indexDir2);

    createIndex(dir2);
    doTestNorms(dir2);

    // add index1 and index2 to a third index: index3
    String indexDir3(FileUtils::joinPath(getTempDir(), L"lucenetestindex3"));
    DirectoryPtr dir3 = FSDirectory::open(indexDir3);

    createIndex(dir3);
    IndexWriterPtr iw = newLucene<IndexWriter>(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED);
    iw->setMaxBufferedDocs(5);
    iw->setMergeFactor(3);
    iw->addIndexesNoOptimize(newCollection<DirectoryPtr>(dir1, dir2));
    iw->optimize();
    iw->close();

    norms1.addAll(norms.begin(), norms.end());
    norms = norms1;
    modifiedNorms1.addAll(modifiedNorms.begin(), modifiedNorms.end());
    modifiedNorms = modifiedNorms1;
    numDocNorms += numDocNorms1;

    // test with index3
    verifyIndex(dir3);
    doTestNorms(dir3);

    // now with optimize
    iw = newLucene<IndexWriter>(dir3, anlzr, false, IndexWriter::MaxFieldLengthLIMITED);
    iw->setMaxBufferedDocs(5);
    iw->setMergeFactor(3);
    iw->optimize();
    iw->close();
    verifyIndex(dir3);

    dir1->close();
    dir2->close();
    dir3->close();
}