Example #1
0
tstring Writer::formatDocument(DocumentPtr document)
{
	ASSERT(document.get() != nullptr);

	m_buffer.clear();

	writeContainer(*document.get());

	ASSERT(m_depth == 0);

	return m_buffer;
}
 void testRightOpenRange(int32_t precisionStep) {
     String field = L"field" + StringUtils::toString(precisionStep);
     int32_t count = 3000;
     int32_t lower = (count - 1) * distance + (distance / 3) + startOffset;
     NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(field, precisionStep, lower, INT_MAX, true, true);
     TopDocsPtr topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER());
     Collection<ScoreDocPtr> sd = topDocs->scoreDocs;
     EXPECT_TRUE(sd);
     EXPECT_EQ(noDocs - count, sd.size());
     DocumentPtr doc = searcher->doc(sd[0]->doc);
     EXPECT_EQ(StringUtils::toString(count * distance + startOffset), doc->get(field));
     doc = searcher->doc(sd[sd.size() - 1]->doc);
     EXPECT_EQ(StringUtils::toString((noDocs - 1) * distance + startOffset), doc->get(field));
 }
    /// test for both constant score and boolean query, the other tests only use the constant score mode
    void testRange(int32_t precisionStep) {
        String field = L"field" + StringUtils::toString(precisionStep);
        int32_t count = 3000;
        int32_t lower = (distance * 3 / 2) + startOffset;
        int32_t upper = lower + count * distance + (distance / 3);

        NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true);
        NumericRangeFilterPtr f = NumericRangeFilter::newIntRange(field, precisionStep, lower, upper, true, true);
        int32_t lastTerms = 0;
        for (uint8_t i = 0; i < 3; ++i) {
            TopDocsPtr topDocs;
            int32_t terms;
            String type;
            q->clearTotalNumberOfTerms();
            f->clearTotalNumberOfTerms();
            switch (i) {
            case 0:
                type = L" (constant score filter rewrite)";
                q->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE());
                topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER());
                terms = q->getTotalNumberOfTerms();
                break;
            case 1:
                type = L" (constant score boolean rewrite)";
                q->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE());
                topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER());
                terms = q->getTotalNumberOfTerms();
                break;
            case 2:
                type = L" (filter)";
                topDocs = searcher->search(newLucene<MatchAllDocsQuery>(), f, noDocs, Sort::INDEXORDER());
                terms = f->getTotalNumberOfTerms();
                break;
            default:
                return;
            }
            // std::cout << "Found " << terms << " distinct terms in range for field '" << field << "'" << type << ".";
            Collection<ScoreDocPtr> sd = topDocs->scoreDocs;
            EXPECT_TRUE(sd);
            EXPECT_EQ(count, sd.size());
            DocumentPtr doc = searcher->doc(sd[0]->doc);
            EXPECT_EQ(StringUtils::toString(2 * distance + startOffset), doc->get(field));
            doc = searcher->doc(sd[sd.size() - 1]->doc);
            EXPECT_EQ(StringUtils::toString((1 + count) * distance + startOffset), doc->get(field));
            if (i > 0) {
                EXPECT_EQ(lastTerms, terms);
            }
            lastTerms = terms;
        }
    }
 /// checks if a query yields the same result when executed on a single IndexSearcher containing all 
 /// documents and on MultiSearcher aggregating sub-searchers
 /// @param queryStr  the query to check.
 void checkQuery(const String& queryStr)
 {
     QueryParserPtr queryParser = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, FIELD_NAME, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT));
     QueryPtr query = queryParser->parse(queryStr);
     Collection<ScoreDocPtr> multiSearcherHits = multiSearcher->search(query, FilterPtr(), 1000)->scoreDocs;
     Collection<ScoreDocPtr> singleSearcherHits = singleSearcher->search(query, FilterPtr(), 1000)->scoreDocs;
     BOOST_CHECK_EQUAL(multiSearcherHits.size(), singleSearcherHits.size());
     for (int32_t i = 0; i < multiSearcherHits.size(); ++i)
     {
         DocumentPtr docMulti = multiSearcher->doc(multiSearcherHits[i]->doc);
         DocumentPtr docSingle = singleSearcher->doc(singleSearcherHits[i]->doc);
         BOOST_CHECK_CLOSE_FRACTION(multiSearcherHits[i]->score, singleSearcherHits[i]->score, 0.001);
         BOOST_CHECK_EQUAL(docMulti->get(FIELD_NAME), docSingle->get(FIELD_NAME));
     }
 }
TEST_F(DateSortTest, testReverseDateSort) {
    IndexSearcherPtr searcher = newLucene<IndexSearcher>(directory, true);

    SortPtr sort = newLucene<Sort>(newLucene<SortField>(DATE_TIME_FIELD, SortField::STRING, true));

    QueryParserPtr queryParser = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, TEXT_FIELD, newLucene<WhitespaceAnalyzer>());
    QueryPtr query = queryParser->parse(L"Document");

    // Execute the search and process the search results.
    Collection<String> actualOrder = Collection<String>::newInstance(5);
    Collection<ScoreDocPtr>hits = searcher->search(query, FilterPtr(), 1000, sort)->scoreDocs;
    for (int32_t i = 0; i < hits.size(); ++i) {
        DocumentPtr document = searcher->doc(hits[i]->doc);
        String text = document->get(TEXT_FIELD);
        actualOrder[i] = text;
    }
    searcher->close();

    // Set up the expected order (ie. Document 5, 4, 3, 2, 1).
    Collection<String> expectedOrder = Collection<String>::newInstance(5);
    expectedOrder[0] = L"Document 5";
    expectedOrder[1] = L"Document 4";
    expectedOrder[2] = L"Document 3";
    expectedOrder[3] = L"Document 2";
    expectedOrder[4] = L"Document 1";

    EXPECT_TRUE(expectedOrder.equals(actualOrder));
}
Example #6
0
QMap< int, float >
FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
{
    Q_ASSERT( query->isFullTextQuery() );

    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( m_luceneDir ) )
            {
                tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = newLucene<IndexSearcher>( m_luceneReader );
        }

        QueryParserPtr parser = newLucene<QueryParser>( LuceneVersion::LUCENE_CURRENT, L"album", m_analyzer );
        QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() );

        FuzzyQueryPtr qry = newLucene<FuzzyQuery>( newLucene<Term>( L"album", q.toStdWString() ) );
        TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 99999, false );
        m_luceneSearcher->search( boost::dynamic_pointer_cast<Query>( qry ), collector );
        Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs;

        for ( int i = 0; i < collector->getTotalHits(); i++ )
        {
            DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc );
            float score = hits[i]->score;
            int id = QString::fromStdWString( d->get( L"albumid" ) ).toInt();

            if ( score > 0.30 )
            {
                resultsmap.insert( id, score );
//                tDebug() << "Index hit:" << id << score;
            }
        }
    }
    catch( LuceneException& error )
    {
        tDebug() << "Caught Lucene error:" << error.what();

        QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
    }

    return resultsmap;
}
Example #7
0
QMap< int, float >
FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
{
    Q_ASSERT( query->isFullTextQuery() );

//    QMutexLocker lock( &m_mutex );
    QMap< int, float > resultsmap;
    if ( !m_luceneReader || !m_luceneSearcher )
        return resultsmap;

    try
    {
        QueryParserPtr parser = newLucene<QueryParser>( LuceneVersion::LUCENE_CURRENT, L"album", m_analyzer );
        const QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() );

        FuzzyQueryPtr qry = newLucene<FuzzyQuery>( newLucene<Term>( L"album", q.toStdWString() ) );
        TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 99999, false );
        m_luceneSearcher->search( boost::dynamic_pointer_cast<Query>( qry ), collector );
        Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs;

        for ( int i = 0; i < collector->getTotalHits(); i++ )
        {
            DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc );
            float score = hits[i]->score;
            int id = QString::fromStdWString( d->get( L"albumid" ) ).toInt();

            if ( score > 0.30 )
            {
                resultsmap.insert( id, score );
//                tDebug() << "Index hit:" << id << score;
            }
        }
    }
    catch( LuceneException& error )
    {
        tDebug() << "Caught Lucene error:" << QString::fromWCharArray( error.getError().c_str() );
    }

    return resultsmap;
}
Example #8
0
QMap< int, float >
FuzzyIndex::search( const Tomahawk::query_ptr& query )
{
    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( m_luceneDir ) )
            {
                tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = newLucene<IndexSearcher>( m_luceneReader );
        }

        float minScore;
        Collection<String> fields; // = newCollection<String>();
        MultiFieldQueryParserPtr parser = newLucene<MultiFieldQueryParser>( LuceneVersion::LUCENE_CURRENT, fields, m_analyzer );
        BooleanQueryPtr qry = newLucene<BooleanQuery>();

        if ( query->isFullTextQuery() )
        {
            QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() );

            FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", q.toStdWString() ) );
            qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::SHOULD );

            FuzzyQueryPtr fqry2 = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", q.toStdWString() ) );
            qry->add( boost::dynamic_pointer_cast<Query>( fqry2 ), BooleanClause::SHOULD );

            FuzzyQueryPtr fqry3 = newLucene<FuzzyQuery>( newLucene<Term>( L"fulltext", q.toStdWString() ) );
            qry->add( boost::dynamic_pointer_cast<Query>( fqry3 ), BooleanClause::SHOULD );

            minScore = 0.00;
        }
        else
        {
            QString track = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->track() );
            QString artist = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->artist() );
            //QString album = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->album() );

            FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", track.toStdWString() ) );
            qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::MUST );

            FuzzyQueryPtr fqry2 = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", artist.toStdWString() ) );
            qry->add( boost::dynamic_pointer_cast<Query>( fqry2 ), BooleanClause::MUST );

            minScore = 0.00;
        }

        TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 50, false );
        m_luceneSearcher->search( qry, collector );
        Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs;

        for ( int i = 0; i < collector->getTotalHits() && i < 50; i++ )
        {
            DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc );
            float score = hits[i]->score;
            int id = QString::fromStdWString( d->get( L"trackid" ) ).toInt();

            if ( score > minScore )
            {
                resultsmap.insert( id, score );
//                tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() );
            }
        }
    }
    catch( LuceneException& error )
    {
        tDebug() << "Caught Lucene error:" << error.what() << query->toString();

        QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
    }

    return resultsmap;
}
Example #9
0
QMap< int, float >
FuzzyIndex::search( const Tomahawk::query_ptr& query )
{
//    QMutexLocker lock( &m_mutex );
    QMap< int, float > resultsmap;
    if ( !m_luceneReader || !m_luceneSearcher )
        return resultsmap;

    try
    {
//        float minScore = 0.00;
        Collection<String> fields; // = newCollection<String>();
        MultiFieldQueryParserPtr parser = newLucene<MultiFieldQueryParser>( LuceneVersion::LUCENE_CURRENT, fields, m_analyzer );
        BooleanQueryPtr qry = newLucene<BooleanQuery>();

        if ( query->isFullTextQuery() )
        {
            const QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() );

            FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", q.toStdWString() ) );
            qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::SHOULD );

            FuzzyQueryPtr fqry2 = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", q.toStdWString() ) );
            qry->add( boost::dynamic_pointer_cast<Query>( fqry2 ), BooleanClause::SHOULD );

            FuzzyQueryPtr fqry3 = newLucene<FuzzyQuery>( newLucene<Term>( L"fulltext", q.toStdWString() ) );
            qry->add( boost::dynamic_pointer_cast<Query>( fqry3 ), BooleanClause::SHOULD );
        }
        else
        {
            const QString track = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->track() );
            const QString artist = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->artist() );
            //QString album = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->album() );

            FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", track.toStdWString() ), 0.5, 3 );
            qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::MUST );

            FuzzyQueryPtr fqry2 = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", artist.toStdWString() ), 0.5, 3 );
            qry->add( boost::dynamic_pointer_cast<Query>( fqry2 ), BooleanClause::MUST );
        }

        TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 20, true );
        m_luceneSearcher->search( qry, collector );
        Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs;

        for ( int i = 0; i < collector->getTotalHits() && i < 20; i++ )
        {
            DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc );
            const float score = hits[i]->score;
            const int id = QString::fromStdWString( d->get( L"trackid" ) ).toInt();

//            if ( score > minScore )
            {
                resultsmap.insert( id, score );
//                tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() );
            }
        }
    }
    catch( LuceneException& error )
    {
        tDebug() << "Caught Lucene error:" << QString::fromWCharArray( error.getError().c_str() ) << query->toString();
    }

    return resultsmap;
}
Example #10
0
    const RawDocumentPtr& pOrgRawDoc = pThis->m_pDocSrc->getRawDocument();
    *ppRawDoc = ComRawDocument::wrap(const_cast<RawDocument*>(pOrgRawDoc.get()));
    return FX_S_OK;
}

FX_STDMETHODIMP ComDocumentSource::XDocumentSource::acquireDocument(
        /* [in] */ FX_NS(com)::FX_BSTR processorName,
        /* [out] */ IDocument** ppDocument)
{
    ComStr comStrName(processorName);
    string sProcName;
    comStrName.copyTo(sProcName);

    FX_METHOD_PROLOGUE(ComDocumentSource, DocumentSource);
    DocumentPtr pDoc = pThis->m_pDocSrc->acquireDocument(sProcName);
    *ppDocument = ComDocument::wrap(pDoc.get());
    return FX_S_OK;
}

FX_STDMETHODIMP ComDocumentSource::XDocumentSource::stealLastDocument(/* [out] */ IDocument** ppDocument)
{
    FX_METHOD_PROLOGUE(ComDocumentSource, DocumentSource);
    DocumentPtr pDoc = pThis->m_pDocSrc->stealLastDocument();
    *ppDocument = ComDocument::wrap(pDoc.get());
    return FX_S_OK;
}

FX_STDMETHODIMP ComDocumentSource::XDocumentSource::toBeContinued(/* [out] */ FX_NS(com)::FX_BOOL* bContinue)
{
    FX_METHOD_PROLOGUE(ComDocumentSource, DocumentSource);
    *bContinue = (pThis->m_pDocSrc->toBeContinued() == true) ? 1 : 0;