TEST_F(BooleanScorerTest, testMethod) {
    static const String FIELD = L"category";

    RAMDirectoryPtr directory = newLucene<RAMDirectory>();
    Collection<String> values = newCollection<String>(L"1", L"2", L"3", L"4");

    IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
    for (int32_t i = 0; i < values.size(); ++i) {
        DocumentPtr doc = newLucene<Document>();
        doc->add(newLucene<Field>(FIELD, values[i], Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
        writer->addDocument(doc);
    }
    writer->close();

    BooleanQueryPtr booleanQuery1 = newLucene<BooleanQuery>();
    booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"1")), BooleanClause::SHOULD);
    booleanQuery1->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"2")), BooleanClause::SHOULD);

    BooleanQueryPtr query = newLucene<BooleanQuery>();
    query->add(booleanQuery1, BooleanClause::MUST);
    query->add(newLucene<TermQuery>(newLucene<Term>(FIELD, L"9")), BooleanClause::MUST_NOT);

    IndexSearcherPtr indexSearcher = newLucene<IndexSearcher>(directory, true);
    Collection<ScoreDocPtr> hits = indexSearcher->search(query, FilterPtr(), 1000)->scoreDocs;
    EXPECT_EQ(2, hits.size());
}
示例#2
0
// ** Films::filmIdToObjectId
OID Films::filmIdToObjectId( int filmId ) const
{
    DocumentPtr document = m_source->collection( "items" )->findOne( QUERY( "itemId" << filmId ) );
    assert( document != NULL );

    return document->_id();
}
示例#3
0
// ** Films::similarTo
SimilarFilmsArray Films::similarTo( const std::string& oid, int count ) const
{
    SimilarFilmsArray result;

    OID           objectId = OID( oid );
    CollectionPtr similars = m_target->collection( "similar" );
    CursorPtr     cursor   = similars->find( QUERY( "$or" << ARRAY( DOCUMENT( "first" << oid ) << DOCUMENT( "second" << objectId ) ) ) );
    DocumentPtr   document;

    while( (document = cursor->next()) ) {
        // ** Read data from document
        OID first  = document->objectId( "first" );
        OID second = document->objectId( "second" );
        int value  = document->integer( "value" );

        // ** Decode similarity & accuracy
        float similarity, accuracy;
        decodeSimilarity( value, similarity, accuracy );

        // ** Push similar film
        Film    film    = filmById( first == objectId ? second : first );
        Quality quality = qualityFromRange( similarity, m_similarityQuartiles );
        result.push_back( SimilarFilm( film, similarity, accuracy, quality ) );
    }

    std::sort( result.begin(), result.end(), SimilarFilm::sortBySimilarity );
    if( count ) {
        result.resize( std::min( count, ( int )result.size() ) );
    }

    return result;
}
示例#4
0
// ** Films::filmFromDocument
Film Films::filmFromDocument( const DocumentPtr& document ) const
{
    Film film( document->_id(), document->string( "name.ru" ), document->integerSet( "genres" ), document->integer( "year" ) );
    film.m_video = document->string( "video" );

    return film;
}
void StandardDocumentProcessorTestCase::testProcessWithEmptyField()
{
    String sPath = writeTestFile("file_with_empty_field.txt", TEST_FILE_WITH_EMPTY_FIELD);

    StandardDocumentProcessor processor;
    processor.init(m_pDocSchema.get());
    
    DocumentSource docSource(m_pDocSchema.get());
    RawDocumentPtr pRawDoc = new RawDocument();
    pRawDoc->setPath(sPath);
    docSource.setRawDocument(pRawDoc);

    processor.process(docSource);

    DocumentPtr pDoc = docSource.stealLastDocument();
    CPPUNIT_ASSERT(pDoc.isNotNull());

    Answer ans;
    makeAnswer(TEST_FILE_WITH_EMPTY_FIELD, ans);

    Document::Iterator it = pDoc->iterator();
    CPPUNIT_ASSERT_EQUAL(ans.size(), it.size());
    size_t i = 0;
    while (it.hasNext())
    {
        const Field* pField = it.next();
//        cout << ans[i].first << " : " << ans[i].second << endl;
        CPPUNIT_ASSERT_EQUAL(ans[i].first, pField->getFieldSchema()->getName());
        CPPUNIT_ASSERT_EQUAL(ans[i].second, std::string(pField->getValue().c_str()));
        ++i;
    }
}
void StandardDocumentProcessorTestCase::testProcessMultiFile()
{
    string sPath = writeTestFile("file2.txt", TEST_FILE2);

    StandardDocumentProcessor processor;
    processor.init(m_pDocSchema.get());
    
    DocumentSource docSource(m_pDocSchema.get());

    Answer ans;
    makeAnswer(TEST_FILE2, ans);
    RawDocumentPtr pRawDoc = new RawDocument();
    pRawDoc->setPath(sPath);
    docSource.setRawDocument(pRawDoc);

    size_t i = 0;
    do
    {
        processor.process(docSource);

        DocumentPtr pDoc = docSource.stealLastDocument();
        CPPUNIT_ASSERT(pDoc.isNotNull());

        Document::Iterator it = pDoc->iterator();
        while (it.hasNext())
        {
            const Field* pField = it.next();
            CPPUNIT_ASSERT_EQUAL(ans[i].first, pField->getFieldSchema()->getName());
            CPPUNIT_ASSERT_EQUAL(ans[i].second, std::string(pField->getValue().c_str()));
            ++i;
        }
    } while(docSource.toBeContinued());
    CPPUNIT_ASSERT_EQUAL(ans.size(), i);
}
示例#7
0
      //---------------------------------------------------------------------
      ElementPtr PeerContactProfile::getPrivateProfile() const
      {
        AutoRecursiveLock lock(mLock);
        if (!mDocument) return ElementPtr();
        if (!hasContactProfileSecret()) return ElementPtr();

        try {
          ElementPtr contactProfileElement = getContactProfileElement();
          if (!contactProfileElement) return ElementPtr();
          ElementPtr privateElement = contactProfileElement->findFirstChildElementChecked("private");
          ElementPtr profileElement = privateElement->findFirstChildElementChecked("encryptedProfile");
          ElementPtr saltElement = privateElement->findFirstChildElementChecked("salt");

          String saltAsBase64 = saltElement->getText(true);

          SecureByteBlock output;
          decryptAndNulTerminateFromBase64(
                                           "profile",
                                           mContactProfileSecret,
                                           saltAsBase64,
                                           profileElement->getText(true),
                                           output
                                           );

          String parseData = (CSTR)((const BYTE *)output);
          DocumentPtr temp = Document::create();
          temp->parse(parseData);
          ElementPtr result = temp->findFirstChildElementChecked("profile");
          result->orphan();
          return result;
        } catch (zsLib::XML::Exceptions::CheckFailed &) {
        }
        return ElementPtr();
      }
示例#8
0
void OGRLIBKMLLayer::SetStyleTableDirectly( OGRStyleTable * poStyleTable )
{
    if( !bUpdate || m_poKmlLayer == NULL )
        return;

    KmlFactory *poKmlFactory = m_poOgrDS->GetKmlFactory();

    if( m_poStyleTable )
        delete m_poStyleTable;

    m_poStyleTable = poStyleTable;

    if( m_poKmlLayer->IsA( kmldom::Type_Document ) )
    {
        /***** delete all the styles *****/
        DocumentPtr poKmlDocument = AsDocument( m_poKmlLayer );
        const int nKmlStyles =
            static_cast<int>(poKmlDocument->get_schema_array_size());

        for( int iKmlStyle = nKmlStyles - 1; iKmlStyle >= 0; iKmlStyle-- )
        {
            poKmlDocument->DeleteStyleSelectorAt( iKmlStyle );
        }

        /***** add the new style table to the document *****/
        styletable2kml( poStyleTable, poKmlFactory,
                        AsContainer( poKmlDocument ) );
    }

    /***** mark the layer as updated *****/
    bUpdated = true;
    m_poOgrDS->Updated();
}
    FieldCacheSanityCheckerTestFixture()
    {
        RAMDirectoryPtr dirA = newLucene<RAMDirectory>();
        RAMDirectoryPtr dirB = newLucene<RAMDirectory>();

        IndexWriterPtr wA = newLucene<IndexWriter>(dirA, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
        IndexWriterPtr wB = newLucene<IndexWriter>(dirB, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        int64_t theLong = LLONG_MAX;
        double theDouble = DBL_MAX;
        uint8_t theByte = UCHAR_MAX;
        int32_t theInt = INT_MAX;
        for (int32_t i = 0; i < NUM_DOCS; ++i)
        {
            DocumentPtr doc = newLucene<Document>();
            doc->add(newLucene<Field>(L"theLong", StringUtils::toString(theLong--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            doc->add(newLucene<Field>(L"theDouble", StringUtils::toString(theDouble--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            doc->add(newLucene<Field>(L"theByte", StringUtils::toString(theByte--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            doc->add(newLucene<Field>(L"theInt", StringUtils::toString(theInt--), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
            if (i % 3 == 0)
                wA->addDocument(doc);
            else
                wB->addDocument(doc);
        }
        wA->close();
        wB->close();
        readerA = IndexReader::open(dirA, true);
        readerB = IndexReader::open(dirB, true);
        readerX = newLucene<MultiReader>(newCollection<IndexReaderPtr>(readerA, readerB));
    }
        //---------------------------------------------------------------------
        DocumentPtr LockboxContentGetRequest::encode()
        {
          DocumentPtr ret = IMessageHelper::createDocumentWithRoot(*this);
          ElementPtr root = ret->getFirstChildElement();

          String clientNonce = IHelper::randomString(32);

          LockboxInfo lockboxInfo;

          lockboxInfo.mAccessToken = mLockboxInfo.mAccessToken;
          if (mLockboxInfo.mAccessSecret.hasData()) {
            lockboxInfo.mAccessSecretProofExpires = zsLib::now() + Seconds(OPENPEER_STACK_MESSAGE_LOCKBOX_CONTENT_GET_REQUEST_EXPIRES_TIME_IN_SECONDS);
            lockboxInfo.mAccessSecretProof = IHelper::convertToHex(*IHelper::hmac(*IHelper::hmacKeyFromPassphrase(mLockboxInfo.mAccessSecret), "lockbox-access-validate:" + clientNonce + ":" + IHelper::timeToString(lockboxInfo.mAccessSecretProofExpires) + ":" + lockboxInfo.mAccessToken + ":lockbox-content-get"));
          }

          root->adoptAsLastChild(IMessageHelper::createElementWithText("nonce", clientNonce));
          if (lockboxInfo.hasData()) {
            root->adoptAsLastChild(MessageHelper::createElement(lockboxInfo));
          }

          ElementPtr namespacesEl = IMessageHelper::createElement("namespaces");

          for (NamespaceInfoMap::iterator iter = mNamespaceInfos.begin(); iter != mNamespaceInfos.end(); ++iter)
          {
            const NamespaceInfo &namespaceInfo = (*iter).second;
            namespacesEl->adoptAsLastChild(MessageHelper::createElement(namespaceInfo));
          }

          if (namespacesEl->hasChildren()) {
            root->adoptAsLastChild(namespacesEl);
          }

          return ret;
        }
void TrecDocumentProcessorTestCase::testProcessGZipFile()
{
    TrecDocumentProcessor processor;
    processor.init(m_pDocSchema.get(), m_pDocTemp.get());
    
    DocumentSource docSource(m_pDocSchema.get());
    RawDocumentPtr pRawDoc = new RawDocument();
    pRawDoc->setPath(getTestPath() + "/1.gz");
    docSource.setRawDocument(pRawDoc);

    Answer ans;
    makeAnswer(TEST_FILE2, ans);

    size_t i = 0;
    do
    {
        processor.process(docSource);

        DocumentPtr pDoc = docSource.stealLastDocument();
        CPPUNIT_ASSERT(pDoc);

        Document::Iterator it = pDoc->iterator();
        while (it.hasNext())
        {
            const Field* pField = it.next();
            CPPUNIT_ASSERT_EQUAL(ans[i].first, pField->getFieldSchema()->getName());
            CPPUNIT_ASSERT_EQUAL(ans[i].second, std::string(pField->getValue().c_str()));
            ++i;
        }
    } while(docSource.toBeContinued());
    CPPUNIT_ASSERT_EQUAL(ans.size(), i);
}
    /// Build a randomish document for both RAMDirectory and MemoryIndex, and run all the queries against it.
    void checkAgainstRAMDirectory() {
        StringStream fooField;
        StringStream termField;

        // add up to 250 terms to field "foo"
        int32_t fieldCount = random->nextInt(250) + 1;
        for (int32_t i = 0; i < fieldCount; ++i) {
            fooField << L" " << randomTerm();
        }

        // add up to 250 terms to field "foo"
        int32_t termCount = random->nextInt(250) + 1;
        for (int32_t i = 0; i < termCount; ++i) {
            termField << L" " << randomTerm();
        }

        RAMDirectoryPtr ramdir = newLucene<RAMDirectory>();
        AnalyzerPtr analyzer = randomAnalyzer();
        IndexWriterPtr writer = newLucene<IndexWriter>(ramdir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED);
        DocumentPtr doc = newLucene<Document>();
        FieldPtr field1 = newLucene<Field>(L"foo", fooField.str(), Field::STORE_NO, Field::INDEX_ANALYZED);
        FieldPtr field2 = newLucene<Field>(L"term", termField.str(), Field::STORE_NO, Field::INDEX_ANALYZED);
        doc->add(field1);
        doc->add(field2);
        writer->addDocument(doc);
        writer->close();

        MemoryIndexPtr memory = newLucene<MemoryIndex>();
        memory->addField(L"foo", fooField.str(), analyzer);
        memory->addField(L"term", termField.str(), analyzer);
        checkAllQueries(memory, ramdir, analyzer);
    }
void TrecDocumentProcessorTestCase::testProcessFileMisField()
{
    string sPath = writeTestFile("trec_file3.txt", TEST_FILE_MISS_FIELD);

    TrecDocumentProcessor processor;
    processor.init(m_pDocSchema.get(), m_pDocTemp.get());
    
    DocumentSource docSource(m_pDocSchema.get());
    RawDocumentPtr pRawDoc = new RawDocument();
    pRawDoc->setPath(sPath);
    docSource.setRawDocument(pRawDoc);

    processor.process(docSource);

    DocumentPtr pDoc = docSource.stealLastDocument();
    CPPUNIT_ASSERT(pDoc);

    Answer ans;
    makeAnswer(TEST_FILE_MISS_FIELD, ans);

    Document::Iterator it = pDoc->iterator();
    CPPUNIT_ASSERT_EQUAL(ans.size(), it.size());
    size_t i = 0;
    while (it.hasNext())
    {
        const Field* pField = it.next();
//        cout << ans[i].first << " : " << ans[i].second << endl;
        CPPUNIT_ASSERT_EQUAL(ans[i].first, pField->getFieldSchema()->getName());
        CPPUNIT_ASSERT_EQUAL(ans[i].second, std::string(pField->getValue().c_str()));
        ++i;
    }
}
示例#14
0
void DateTimeIndexTestCase::buildDateTimeIndex(const string& sDocs)
{
    try
    {
        DocumentSchema schema;
        schema.addField("DateTime1", "DATETIME_I", true);

        Index index;
        index.open(getIndexPath(), Index::WRITE, &schema);
        IndexWriterPtr pIndexWriter = index.acquireWriter();
        
        StringTokenizer st(sDocs, ";", StringTokenizer::TOKEN_TRIM |
                           StringTokenizer::TOKEN_IGNORE_EMPTY);
        for (StringTokenizer::Iterator it = st.begin(); it != st.end(); ++it)
        {
            DocumentPtr pDoc = new Document(pIndexWriter->getDocSchema());
            pDoc->addField(0, (*it).c_str());
            pIndexWriter->addDocument(pDoc);
        }

        pIndexWriter->close();
    }
    catch (const FirteXException& )
    {
        CPPUNIT_ASSERT(false);
    }
}
示例#15
0
    void index(const tstring& sDir)
    {
        IndexWriterPtr pIndexWriter = m_pIndex->acquireWriter();

        DirectoryIterator di(sDir, false);
        while(di.hasNext())
        {
            const File& f = di.next();
            if(f.isFile())
            {
                BinaryFile bf;
                bf.open(f.getPath().c_str(), BinaryFile::READ);
                if(bf.isFileOpen())
                {
                    size_t nRead = (size_t)bf.getLength();
                    if (nRead > 0)
                    {
                        DocumentPtr pDoc = new Document(pIndexWriter->getDocSchema());
                        pDoc->addField(0, f.getPath().c_str());
                        char* buf = new char[nRead + 1];
                        bf.read(buf, nRead);
                        buf[nRead] = 0;
                        pDoc->addField(1, buf, nRead, false);
                        delete[] buf;
                        
                        pIndexWriter->addDocument(pDoc);
                    }
                }
            }
        }
        docPool.commit();
        pIndexWriter->close();
    }
        //---------------------------------------------------------------------
        DocumentPtr IdentityLookupRequest::encode()
        {
          DocumentPtr ret = IMessageHelper::createDocumentWithRoot(*this);
          ElementPtr root = ret->getFirstChildElement();

          ElementPtr providersEl = Element::create("providers");

          for (ProviderList::iterator iter = mProviders.begin(); iter != mProviders.end(); ++iter)
          {
            Provider &info = (*iter);
            ElementPtr providerEl = Element::create("provider");

            if (!info.mBase.isEmpty()) {
              providerEl->adoptAsLastChild(IMessageHelper::createElementWithTextAndJSONEncode("base", info.mBase));
            }
            if (!info.mSeparator.isEmpty()) {
              providerEl->adoptAsLastChild(IMessageHelper::createElementWithTextAndJSONEncode("separator", info.mSeparator));
            }
            if (!info.mIdentities.isEmpty()) {
              providerEl->adoptAsLastChild(IMessageHelper::createElementWithTextAndJSONEncode("identities", info.mIdentities));
            }
            if (providerEl) {
              providersEl->adoptAsLastChild(providerEl);
            }
          }

          if (providersEl->hasChildren()) {
            root->adoptAsLastChild(providersEl);
          }

          return ret;
        }
TEST_F(LazyProxSkippingTest, testSeek) {
    DirectoryPtr directory = newLucene<RAMDirectory>();
    IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
    for (int32_t i = 0; i < 10; ++i) {
        DocumentPtr doc = newLucene<Document>();
        doc->add(newLucene<Field>(field, L"a b", Field::STORE_YES, Field::INDEX_ANALYZED));
        writer->addDocument(doc);
    }

    writer->close();
    IndexReaderPtr reader = IndexReader::open(directory, true);
    TermPositionsPtr tp = reader->termPositions();
    tp->seek(newLucene<Term>(field, L"b"));
    for (int32_t i = 0; i < 10; ++i) {
        tp->next();
        EXPECT_EQ(tp->doc(), i);
        EXPECT_EQ(tp->nextPosition(), 1);
    }
    tp->seek(newLucene<Term>(field, L"a"));
    for (int32_t i = 0; i < 10; ++i) {
        tp->next();
        EXPECT_EQ(tp->doc(), i);
        EXPECT_EQ(tp->nextPosition(), 0);
    }
}
        //---------------------------------------------------------------------
        DocumentPtr IdentityAccessRolodexCredentialsGetRequest::encode()
        {
          DocumentPtr ret = IMessageHelper::createDocumentWithRoot(*this);
          ElementPtr root = ret->getFirstChildElement();

          String clientNonce = IHelper::randomString(32);

          IdentityInfo identityInfo;

          identityInfo.mURI = mIdentityInfo.mURI;
          identityInfo.mProvider = mIdentityInfo.mProvider;

          identityInfo.mAccessToken = mIdentityInfo.mAccessToken;
          if (mIdentityInfo.mAccessSecret.hasData()) {
            identityInfo.mAccessSecretProofExpires = zsLib::now() + Seconds(OPENPEER_STACK_MESSAGE_IDENTITY_ACCESS_LOCKBOX_UPDATE_EXPIRES_TIME_IN_SECONDS);
            identityInfo.mAccessSecretProof = IHelper::convertToHex(*IHelper::hmac(*IHelper::hmacKeyFromPassphrase(mIdentityInfo.mAccessSecret), "identity-access-validate:" + identityInfo.mURI + ":" + clientNonce + ":" + IHelper::timeToString(identityInfo.mAccessSecretProofExpires) + ":" + identityInfo.mAccessToken + ":rolodex-credentials-get"));
          }

          root->adoptAsLastChild(IMessageHelper::createElementWithText("nonce", clientNonce));
          if (identityInfo.hasData()) {
            root->adoptAsLastChild(MessageHelper::createElement(identityInfo));
          }

          return ret;
        }
示例#19
0
TEST_F(DateSortTest, testReverseDateSort) {
    IndexSearcherPtr searcher = newLucene<IndexSearcher>(directory, true);

    SortPtr sort = newLucene<Sort>(newLucene<SortField>(DATE_TIME_FIELD, SortField::STRING, true));

    QueryParserPtr queryParser = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, TEXT_FIELD, newLucene<WhitespaceAnalyzer>());
    QueryPtr query = queryParser->parse(L"Document");

    // Execute the search and process the search results.
    Collection<String> actualOrder = Collection<String>::newInstance(5);
    Collection<ScoreDocPtr>hits = searcher->search(query, FilterPtr(), 1000, sort)->scoreDocs;
    for (int32_t i = 0; i < hits.size(); ++i) {
        DocumentPtr document = searcher->doc(hits[i]->doc);
        String text = document->get(TEXT_FIELD);
        actualOrder[i] = text;
    }
    searcher->close();

    // Set up the expected order (ie. Document 5, 4, 3, 2, 1).
    Collection<String> expectedOrder = Collection<String>::newInstance(5);
    expectedOrder[0] = L"Document 5";
    expectedOrder[1] = L"Document 4";
    expectedOrder[2] = L"Document 3";
    expectedOrder[3] = L"Document 2";
    expectedOrder[4] = L"Document 1";

    EXPECT_TRUE(expectedOrder.equals(actualOrder));
}
    ParallelTermEnumTestFixture()
    {
        RAMDirectoryPtr rd1 = newLucene<RAMDirectory>();
        IndexWriterPtr iw1 = newLucene<IndexWriter>(rd1, newLucene<SimpleAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        DocumentPtr doc = newLucene<Document>();
        doc->add(newLucene<Field>(L"field1", L"the quick brown fox jumps", Field::STORE_YES, Field::INDEX_ANALYZED));
        doc->add(newLucene<Field>(L"field2", L"the quick brown fox jumps", Field::STORE_YES, Field::INDEX_ANALYZED));
        doc->add(newLucene<Field>(L"field4", L"", Field::STORE_NO, Field::INDEX_ANALYZED));
        iw1->addDocument(doc);

        iw1->close();
        RAMDirectoryPtr rd2 = newLucene<RAMDirectory>();
        IndexWriterPtr iw2 = newLucene<IndexWriter>(rd2, newLucene<SimpleAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);

        doc = newLucene<Document>();
        doc->add(newLucene<Field>(L"field0", L"", Field::STORE_NO, Field::INDEX_ANALYZED));
        doc->add(newLucene<Field>(L"field1", L"the fox jumps over the lazy dog", Field::STORE_YES, Field::INDEX_ANALYZED));
        doc->add(newLucene<Field>(L"field3", L"the fox jumps over the lazy dog", Field::STORE_YES, Field::INDEX_ANALYZED));
        iw2->addDocument(doc);

        iw2->close();

        this->ir1 = IndexReader::open(rd1, true);
        this->ir2 = IndexReader::open(rd2, true);
    }
示例#21
0
void IndexContentTestCase::testIndexContent_DL()
{
    Index* pIndex;
    IndexReaderPtr pReader;

    const Term* pTerm;
    TermIteratorPtr pTermIter;
    int	docCount = 0;
    int	termCount = 0;
    uint32_t i;
    uint32_t indexTermId;
    string fileName;

    //Check posting list
    Path indexPath = TestHelper::getTestDataPath();
    indexPath.makeDirectory();
    indexPath.pushDirectory(_T("test_dlindex"));    
    pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL);
    auto_ptr<Index> indexPtr(pIndex);
    pReader = pIndex->acquireReader();
    TermReaderPtr pTermReader = pReader->termReader();

    pTermIter = pTermReader->termIterator("BODY");

    StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader();
    //Iterator all terms
    while(pTermIter->next())
    {
        pTerm = pTermIter->term();
		
        CPPUNIT_ASSERT(pTermReader->seek(pTerm));
				
        indexTermId = (pTerm->cast<int32_t>())->getValue();
        docCount = 0;
        TermPostingIteratorPtr pTermDocFreqs = pTermReader->termPostings();
        while(pTermDocFreqs->nextDoc())
        {
            DocumentPtr pDoc = pDocReader->document(pTermDocFreqs->doc());
            docCount++;
            // 获取文件路径
            fileName.assign(pDoc->getField("PATH")->getValue().c_str());

            TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName);
            CPPUNIT_ASSERT(pTermIdList != NULL);

            for(i = 0, termCount = 0; i < pTermIdList->getSize(); i++)
            {
                if(indexTermId == pTermIdList->getValue(i))
                {
                    termCount++;
                }
            }
			
            CPPUNIT_ASSERT_EQUAL((tf_t)termCount, pTermDocFreqs->freq());

        }//end while nextDoc()
        CPPUNIT_ASSERT_EQUAL((df_t)docCount, pTermDocFreqs->getDocFreq());
    }
    CPPUNIT_ASSERT(m_pDocScanner->getTotalTermCount() == pReader->getNumTerms());
}
    void createIndex(int32_t numHits) {
        int32_t numDocs = 500;

        DirectoryPtr directory = newLucene<SeekCountingDirectory>();
        IndexWriterPtr writer = newLucene<IndexWriter>(directory, newLucene<WhitespaceAnalyzer>(), true, IndexWriter::MaxFieldLengthLIMITED);
        writer->setUseCompoundFile(false);
        writer->setMaxBufferedDocs(10);
        for (int32_t i = 0; i < numDocs; ++i) {
            DocumentPtr doc = newLucene<Document>();
            String content;
            if (i % (numDocs / numHits) == 0) {
                // add a document that matches the query "term1 term2"
                content = term1 + L" " + term2;
            } else if (i % 15 == 0) {
                // add a document that only contains term1
                content = term1 + L" " + term1;
            } else {
                // add a document that contains term2 but not term 1
                content = term3 + L" " + term2;
            }

            doc->add(newLucene<Field>(field, content, Field::STORE_YES, Field::INDEX_ANALYZED));
            writer->addDocument(doc);
        }

        // make sure the index has only a single segment
        writer->optimize();
        writer->close();

        SegmentReaderPtr reader = SegmentReader::getOnlySegmentReader(directory);
        searcher = newLucene<IndexSearcher>(reader);
    }
 void addDoc(RAMDirectoryPtr ramDir1, const String& s, bool create)
 {
     IndexWriterPtr iw = newLucene<IndexWriter>(ramDir1, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), create, IndexWriter::MaxFieldLengthLIMITED);
     DocumentPtr doc = newLucene<Document>();
     doc->add(newLucene<Field>(L"body", s, Field::STORE_YES, Field::INDEX_ANALYZED));
     iw->addDocument(doc);
     iw->close();
 }
 DocumentPtr makeDocument(const String& docText)
 {
     DocumentPtr doc = newLucene<Document>();
     FieldPtr f = newLucene<Field>(L"f", docText, Field::STORE_NO, Field::INDEX_ANALYZED);
     f->setOmitNorms(true);
     doc->add(f);
     return doc;
 }
static void addDocs2(IndexWriterPtr writer, int32_t numDocs)
{
    for (int32_t i = 0; i < numDocs; ++i)
    {
        DocumentPtr doc = newLucene<Document>();
        doc->add(newLucene<Field>(L"content", L"bbb", Field::STORE_NO, Field::INDEX_ANALYZED));
        writer->addDocument(doc);
    }
}
示例#26
0
        //---------------------------------------------------------------------
        DocumentPtr FindersGetRequest::encode()
        {
          DocumentPtr ret = IMessageHelper::createDocumentWithRoot(*this);
          ElementPtr root = ret->getFirstChildElement();

          if (hasAttribute(AttributeType_TotalServers)) {
            root->adoptAsLastChild(IMessageHelper::createElementWithNumber("servers", string(mTotalFinders)));
          }
          return ret;
        }
 DocumentPtr newDoc() {
     DocumentPtr d = newLucene<Document>();
     double boost = nextNorm();
     for (int32_t i = 0; i < 10; ++i) {
         FieldPtr f = newLucene<Field>(L"f" + StringUtils::toString(i), L"v" + StringUtils::toString(i), Field::STORE_NO, Field::INDEX_NOT_ANALYZED);
         f->setBoost(boost);
         d->add(f);
     }
     return d;
 }
示例#28
0
Document DOMImplementationImp::createHTMLDocument(const std::u16string& title)
{
    try {
        DocumentPtr document = std::make_shared<DocumentImp>();
        document->setContentType(u"text/html");
        DocumentType doctype = createDocumentType(u"html", u"", u"");    // TODO: set node document
        document->appendChild(doctype);
        Element html = document->createElement(u"html");
        document->appendChild(html);
        Element head = document->createElement(u"head");
        html.appendChild(head);
        if (!title.empty()) {
            Element t = document->createElement(u"title");
            head.appendChild(t);
            Text text = document->createTextNode(title);
            t.appendChild(text);
        }
        Element body = document->createElement(u"body");
        html.appendChild(body);
        // TODO: Step 8.
        return document;
    } catch (...) {
        return nullptr;
    }
}
 virtual void doWork()
 {
     // Update all 100 docs
     for (int32_t i = 0; i < 100; ++i)
     {
         DocumentPtr d = newLucene<Document>();
         d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
         d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED));
         writer->updateDocument(newLucene<Term>(L"id", StringUtils::toString(i)), d);
     }
 }
// Run one indexer and 2 searchers against single index as stress test.
static void runTest(DirectoryPtr directory)
{
    Collection<TimedThreadPtr> threads(Collection<TimedThreadPtr>::newInstance(4));
    AnalyzerPtr analyzer = newLucene<SimpleAnalyzer>();
    
    IndexWriterPtr writer = newLucene<MockIndexWriter>(directory, analyzer, true, IndexWriter::MaxFieldLengthUNLIMITED);
    
    writer->setMaxBufferedDocs(7);
    writer->setMergeFactor(3);
    
    // Establish a base index of 100 docs
    for (int32_t i = 0; i < 100; ++i)
    {
        DocumentPtr d = newLucene<Document>();
        d->add(newLucene<Field>(L"id", StringUtils::toString(i), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));
        d->add(newLucene<Field>(L"contents", intToEnglish(i), Field::STORE_NO, Field::INDEX_ANALYZED));
        if ((i - 1) % 7 == 0)
            writer->commit();
        writer->addDocument(d);
    }
    writer->commit();
    
    IndexReaderPtr r = IndexReader::open(directory, true);
    BOOST_CHECK_EQUAL(100, r->numDocs());
    r->close();

    IndexerThreadPtr indexerThread1 = newLucene<IndexerThread>(writer);
    threads[0] = indexerThread1;
    indexerThread1->start();

    IndexerThreadPtr indexerThread2 = newLucene<IndexerThread>(writer);
    threads[1] = indexerThread2;
    indexerThread2->start();

    SearcherThreadPtr searcherThread1 = newLucene<SearcherThread>(directory);
    threads[2] = searcherThread1;
    searcherThread1->start();

    SearcherThreadPtr searcherThread2 = newLucene<SearcherThread>(directory);
    threads[3] = searcherThread2;
    searcherThread2->start();
    
    indexerThread1->join();
    indexerThread2->join();
    searcherThread1->join();
    searcherThread2->join();
    
    writer->close();

    BOOST_CHECK(!indexerThread1->failed); // hit unexpected exception in indexer1
    BOOST_CHECK(!indexerThread2->failed); // hit unexpected exception in indexer2
    BOOST_CHECK(!searcherThread1->failed); // hit unexpected exception in search1
    BOOST_CHECK(!searcherThread2->failed); // hit unexpected exception in search2
}