tstring Writer::formatDocument(DocumentPtr document) { ASSERT(document.get() != nullptr); m_buffer.clear(); writeContainer(*document.get()); ASSERT(m_depth == 0); return m_buffer; }
void testRightOpenRange(int32_t precisionStep) { String field = L"field" + StringUtils::toString(precisionStep); int32_t count = 3000; int32_t lower = (count - 1) * distance + (distance / 3) + startOffset; NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(field, precisionStep, lower, INT_MAX, true, true); TopDocsPtr topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); Collection<ScoreDocPtr> sd = topDocs->scoreDocs; EXPECT_TRUE(sd); EXPECT_EQ(noDocs - count, sd.size()); DocumentPtr doc = searcher->doc(sd[0]->doc); EXPECT_EQ(StringUtils::toString(count * distance + startOffset), doc->get(field)); doc = searcher->doc(sd[sd.size() - 1]->doc); EXPECT_EQ(StringUtils::toString((noDocs - 1) * distance + startOffset), doc->get(field)); }
/// test for both constant score and boolean query, the other tests only use the constant score mode void testRange(int32_t precisionStep) { String field = L"field" + StringUtils::toString(precisionStep); int32_t count = 3000; int32_t lower = (distance * 3 / 2) + startOffset; int32_t upper = lower + count * distance + (distance / 3); NumericRangeQueryPtr q = NumericRangeQuery::newIntRange(field, precisionStep, lower, upper, true, true); NumericRangeFilterPtr f = NumericRangeFilter::newIntRange(field, precisionStep, lower, upper, true, true); int32_t lastTerms = 0; for (uint8_t i = 0; i < 3; ++i) { TopDocsPtr topDocs; int32_t terms; String type; q->clearTotalNumberOfTerms(); f->clearTotalNumberOfTerms(); switch (i) { case 0: type = L" (constant score filter rewrite)"; q->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); terms = q->getTotalNumberOfTerms(); break; case 1: type = L" (constant score boolean rewrite)"; q->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); topDocs = searcher->search(q, FilterPtr(), noDocs, Sort::INDEXORDER()); terms = q->getTotalNumberOfTerms(); break; case 2: type = L" (filter)"; topDocs = searcher->search(newLucene<MatchAllDocsQuery>(), f, noDocs, Sort::INDEXORDER()); terms = f->getTotalNumberOfTerms(); break; default: return; } // std::cout << "Found " << terms << " distinct terms in range for field '" << field << "'" << type << "."; Collection<ScoreDocPtr> sd = topDocs->scoreDocs; EXPECT_TRUE(sd); EXPECT_EQ(count, sd.size()); DocumentPtr doc = searcher->doc(sd[0]->doc); EXPECT_EQ(StringUtils::toString(2 * distance + startOffset), doc->get(field)); doc = searcher->doc(sd[sd.size() - 1]->doc); EXPECT_EQ(StringUtils::toString((1 + count) * distance + startOffset), doc->get(field)); if (i > 0) { EXPECT_EQ(lastTerms, terms); } lastTerms = terms; } }
/// checks if a query yields the same result when executed on a single IndexSearcher containing all /// documents and on MultiSearcher aggregating sub-searchers /// @param queryStr the query to check. void checkQuery(const String& queryStr) { QueryParserPtr queryParser = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, FIELD_NAME, newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT)); QueryPtr query = queryParser->parse(queryStr); Collection<ScoreDocPtr> multiSearcherHits = multiSearcher->search(query, FilterPtr(), 1000)->scoreDocs; Collection<ScoreDocPtr> singleSearcherHits = singleSearcher->search(query, FilterPtr(), 1000)->scoreDocs; BOOST_CHECK_EQUAL(multiSearcherHits.size(), singleSearcherHits.size()); for (int32_t i = 0; i < multiSearcherHits.size(); ++i) { DocumentPtr docMulti = multiSearcher->doc(multiSearcherHits[i]->doc); DocumentPtr docSingle = singleSearcher->doc(singleSearcherHits[i]->doc); BOOST_CHECK_CLOSE_FRACTION(multiSearcherHits[i]->score, singleSearcherHits[i]->score, 0.001); BOOST_CHECK_EQUAL(docMulti->get(FIELD_NAME), docSingle->get(FIELD_NAME)); } }
TEST_F(DateSortTest, testReverseDateSort) { IndexSearcherPtr searcher = newLucene<IndexSearcher>(directory, true); SortPtr sort = newLucene<Sort>(newLucene<SortField>(DATE_TIME_FIELD, SortField::STRING, true)); QueryParserPtr queryParser = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, TEXT_FIELD, newLucene<WhitespaceAnalyzer>()); QueryPtr query = queryParser->parse(L"Document"); // Execute the search and process the search results. Collection<String> actualOrder = Collection<String>::newInstance(5); Collection<ScoreDocPtr>hits = searcher->search(query, FilterPtr(), 1000, sort)->scoreDocs; for (int32_t i = 0; i < hits.size(); ++i) { DocumentPtr document = searcher->doc(hits[i]->doc); String text = document->get(TEXT_FIELD); actualOrder[i] = text; } searcher->close(); // Set up the expected order (ie. Document 5, 4, 3, 2, 1). Collection<String> expectedOrder = Collection<String>::newInstance(5); expectedOrder[0] = L"Document 5"; expectedOrder[1] = L"Document 4"; expectedOrder[2] = L"Document 3"; expectedOrder[3] = L"Document 2"; expectedOrder[4] = L"Document 1"; EXPECT_TRUE(expectedOrder.equals(actualOrder)); }
QMap< int, float > FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query ) { Q_ASSERT( query->isFullTextQuery() ); QMutexLocker lock( &m_mutex ); QMap< int, float > resultsmap; try { if ( !m_luceneReader ) { if ( !IndexReader::indexExists( m_luceneDir ) ) { tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "index didn't exist."; return resultsmap; } m_luceneReader = IndexReader::open( m_luceneDir ); m_luceneSearcher = newLucene<IndexSearcher>( m_luceneReader ); } QueryParserPtr parser = newLucene<QueryParser>( LuceneVersion::LUCENE_CURRENT, L"album", m_analyzer ); QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ); FuzzyQueryPtr qry = newLucene<FuzzyQuery>( newLucene<Term>( L"album", q.toStdWString() ) ); TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 99999, false ); m_luceneSearcher->search( boost::dynamic_pointer_cast<Query>( qry ), collector ); Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs; for ( int i = 0; i < collector->getTotalHits(); i++ ) { DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc ); float score = hits[i]->score; int id = QString::fromStdWString( d->get( L"albumid" ) ).toInt(); if ( score > 0.30 ) { resultsmap.insert( id, score ); // tDebug() << "Index hit:" << id << score; } } } catch( LuceneException& error ) { tDebug() << "Caught Lucene error:" << error.what(); QTimer::singleShot( 0, this, SLOT( wipeIndex() ) ); } return resultsmap; }
QMap< int, float > FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query ) { Q_ASSERT( query->isFullTextQuery() ); // QMutexLocker lock( &m_mutex ); QMap< int, float > resultsmap; if ( !m_luceneReader || !m_luceneSearcher ) return resultsmap; try { QueryParserPtr parser = newLucene<QueryParser>( LuceneVersion::LUCENE_CURRENT, L"album", m_analyzer ); const QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ); FuzzyQueryPtr qry = newLucene<FuzzyQuery>( newLucene<Term>( L"album", q.toStdWString() ) ); TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 99999, false ); m_luceneSearcher->search( boost::dynamic_pointer_cast<Query>( qry ), collector ); Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs; for ( int i = 0; i < collector->getTotalHits(); i++ ) { DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc ); float score = hits[i]->score; int id = QString::fromStdWString( d->get( L"albumid" ) ).toInt(); if ( score > 0.30 ) { resultsmap.insert( id, score ); // tDebug() << "Index hit:" << id << score; } } } catch( LuceneException& error ) { tDebug() << "Caught Lucene error:" << QString::fromWCharArray( error.getError().c_str() ); } return resultsmap; }
QMap< int, float > FuzzyIndex::search( const Tomahawk::query_ptr& query ) { QMutexLocker lock( &m_mutex ); QMap< int, float > resultsmap; try { if ( !m_luceneReader ) { if ( !IndexReader::indexExists( m_luceneDir ) ) { tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "index didn't exist."; return resultsmap; } m_luceneReader = IndexReader::open( m_luceneDir ); m_luceneSearcher = newLucene<IndexSearcher>( m_luceneReader ); } float minScore; Collection<String> fields; // = newCollection<String>(); MultiFieldQueryParserPtr parser = newLucene<MultiFieldQueryParser>( LuceneVersion::LUCENE_CURRENT, fields, m_analyzer ); BooleanQueryPtr qry = newLucene<BooleanQuery>(); if ( query->isFullTextQuery() ) { QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ); FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", q.toStdWString() ) ); qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::SHOULD ); FuzzyQueryPtr fqry2 = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", q.toStdWString() ) ); qry->add( boost::dynamic_pointer_cast<Query>( fqry2 ), BooleanClause::SHOULD ); FuzzyQueryPtr fqry3 = newLucene<FuzzyQuery>( newLucene<Term>( L"fulltext", q.toStdWString() ) ); qry->add( boost::dynamic_pointer_cast<Query>( fqry3 ), BooleanClause::SHOULD ); minScore = 0.00; } else { QString track = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->track() ); QString artist = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->artist() ); //QString album = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->album() ); FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", track.toStdWString() ) ); qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::MUST ); FuzzyQueryPtr fqry2 = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", artist.toStdWString() ) ); qry->add( boost::dynamic_pointer_cast<Query>( fqry2 ), BooleanClause::MUST ); minScore = 0.00; } TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 50, false ); m_luceneSearcher->search( qry, collector ); Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs; for ( int i = 0; i < collector->getTotalHits() && i < 50; i++ ) { DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc ); float score = hits[i]->score; int id = QString::fromStdWString( d->get( L"trackid" ) ).toInt(); if ( score > minScore ) { resultsmap.insert( id, score ); // tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() ); } } } catch( LuceneException& error ) { tDebug() << "Caught Lucene error:" << error.what() << query->toString(); QTimer::singleShot( 0, this, SLOT( wipeIndex() ) ); } return resultsmap; }
QMap< int, float > FuzzyIndex::search( const Tomahawk::query_ptr& query ) { // QMutexLocker lock( &m_mutex ); QMap< int, float > resultsmap; if ( !m_luceneReader || !m_luceneSearcher ) return resultsmap; try { // float minScore = 0.00; Collection<String> fields; // = newCollection<String>(); MultiFieldQueryParserPtr parser = newLucene<MultiFieldQueryParser>( LuceneVersion::LUCENE_CURRENT, fields, m_analyzer ); BooleanQueryPtr qry = newLucene<BooleanQuery>(); if ( query->isFullTextQuery() ) { const QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ); FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", q.toStdWString() ) ); qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::SHOULD ); FuzzyQueryPtr fqry2 = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", q.toStdWString() ) ); qry->add( boost::dynamic_pointer_cast<Query>( fqry2 ), BooleanClause::SHOULD ); FuzzyQueryPtr fqry3 = newLucene<FuzzyQuery>( newLucene<Term>( L"fulltext", q.toStdWString() ) ); qry->add( boost::dynamic_pointer_cast<Query>( fqry3 ), BooleanClause::SHOULD ); } else { const QString track = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->track() ); const QString artist = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->artist() ); //QString album = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->album() ); FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", track.toStdWString() ), 0.5, 3 ); qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::MUST ); FuzzyQueryPtr fqry2 = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", artist.toStdWString() ), 0.5, 3 ); qry->add( boost::dynamic_pointer_cast<Query>( fqry2 ), BooleanClause::MUST ); } TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 20, true ); m_luceneSearcher->search( qry, collector ); Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs; for ( int i = 0; i < collector->getTotalHits() && i < 20; i++ ) { DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc ); const float score = hits[i]->score; const int id = QString::fromStdWString( d->get( L"trackid" ) ).toInt(); // if ( score > minScore ) { resultsmap.insert( id, score ); // tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() ); } } } catch( LuceneException& error ) { tDebug() << "Caught Lucene error:" << QString::fromWCharArray( error.getError().c_str() ) << query->toString(); } return resultsmap; }
const RawDocumentPtr& pOrgRawDoc = pThis->m_pDocSrc->getRawDocument(); *ppRawDoc = ComRawDocument::wrap(const_cast<RawDocument*>(pOrgRawDoc.get())); return FX_S_OK; } FX_STDMETHODIMP ComDocumentSource::XDocumentSource::acquireDocument( /* [in] */ FX_NS(com)::FX_BSTR processorName, /* [out] */ IDocument** ppDocument) { ComStr comStrName(processorName); string sProcName; comStrName.copyTo(sProcName); FX_METHOD_PROLOGUE(ComDocumentSource, DocumentSource); DocumentPtr pDoc = pThis->m_pDocSrc->acquireDocument(sProcName); *ppDocument = ComDocument::wrap(pDoc.get()); return FX_S_OK; } FX_STDMETHODIMP ComDocumentSource::XDocumentSource::stealLastDocument(/* [out] */ IDocument** ppDocument) { FX_METHOD_PROLOGUE(ComDocumentSource, DocumentSource); DocumentPtr pDoc = pThis->m_pDocSrc->stealLastDocument(); *ppDocument = ComDocument::wrap(pDoc.get()); return FX_S_OK; } FX_STDMETHODIMP ComDocumentSource::XDocumentSource::toBeContinued(/* [out] */ FX_NS(com)::FX_BOOL* bContinue) { FX_METHOD_PROLOGUE(ComDocumentSource, DocumentSource); *bContinue = (pThis->m_pDocSrc->toBeContinued() == true) ? 1 : 0;