void testExtractFromWildcardQuery( CuTest * tc ) { Directory * pIndex = setUpIndex(); IndexReader * pReader = IndexReader::open( pIndex ); TermSet termSet; WildcardQuery * wildcard; Term * t1; Query * rewrite; t1 = _CLNEW Term( _T("data"), _T("aaaa?") ); wildcard = _CLNEW WildcardQuery( t1 ); rewrite = wildcard->rewrite( pReader ); rewrite->extractTerms( &termSet ); _CLLDECDELETE( t1 ); assertEqualsMsg( _T( "wrong number of terms" ), 3, termSet.size() ); for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ ) { Term * pTerm = *itTerms; if( 0 != _tcscmp( _T( "aaaaa" ), pTerm->text()) && 0 != _tcscmp( _T( "aaaab" ), pTerm->text()) && 0 != _tcscmp( _T( "aaaac" ), pTerm->text())) { assertTrueMsg( _T( "wrong term" ), false ); } } clearTermSet( termSet ); if( rewrite != wildcard ) _CLDELETE( rewrite ); _CLDELETE( wildcard ); t1 = _CLNEW Term( _T("data"), _T("aaa*") ); wildcard = _CLNEW WildcardQuery( t1 ); rewrite = wildcard->rewrite( pReader ); rewrite->extractTerms( &termSet ); _CLLDECDELETE( t1 ); assertEqualsMsg( _T( "wrong number of terms" ), 5, termSet.size() ); for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ ) { Term * pTerm = *itTerms; assertTrueMsg( _T( "wrong term" ), ( 0 == _tcsncmp( _T( "aaa" ), pTerm->text(), 3 ))); } clearTermSet( termSet ); if( rewrite != wildcard ) _CLDELETE( rewrite ); _CLDELETE( wildcard ); pReader->close(); _CLDELETE( pReader ); closeIndex( pIndex ); pIndex = NULL; }
void clearTermSet( TermSet& termSet ) { for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ ) { Term * pTerm = *itTerms; _CLLDECDELETE( pTerm ); } termSet.clear(); }
bool TermSet::is_subset(const TermSet& y) const { TermSet::iterator i, e, j; for(i=y.begin(), e=y.end(); i!=e; i++) { if ( (j=find(*i))==end() ) return false; // term in y isn't here if ( !(*j).is_subset(*i) ) return false; // y term isn't a subset of this term } return true; }
void testExtractFromFuzzyQuery( CuTest * tc ) { Directory * pIndex = setUpIndex(); IndexReader * pReader = IndexReader::open( pIndex ); TermSet termSet; FuzzyQuery * fuzzy; Term * t1; Query * rewrite; t1 = _CLNEW Term( _T("data"), _T("aaaab") ); fuzzy = _CLNEW FuzzyQuery( t1, 0.7f ); rewrite = fuzzy->rewrite( pReader ); rewrite->extractTerms( &termSet ); _CLLDECDELETE( t1 ); assertEqualsMsg( _T( "wrong number of terms" ), 4, termSet.size() ); for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ ) { Term * pTerm = *itTerms; if( 0 != _tcscmp( _T( "aaaaa" ), pTerm->text()) && 0 != _tcscmp( _T( "aaaab" ), pTerm->text()) && 0 != _tcscmp( _T( "aaabb" ), pTerm->text()) && 0 != _tcscmp( _T( "aaaac" ), pTerm->text())) { assertTrueMsg( _T( "wrong term" ), false ); } } clearTermSet( termSet ); if( rewrite != fuzzy ) _CLDELETE( rewrite ); _CLDELETE( fuzzy ); pReader->close(); _CLDELETE( pReader ); closeIndex( pIndex ); pIndex = NULL; }
void QueryTermExtractor::getTerms(const Query * query, WeightedTermList * terms, bool prohibited, const TCHAR* fieldName) { if (query->instanceOf( BooleanQuery::getClassName() )) { getTermsFromBooleanQuery((BooleanQuery *) query, terms, prohibited, fieldName); } // FilteredQuery not implemented yet // else if (query->instanceOf( FilteredQuery::getClassName() )) // getTermsFromFilteredQuery((FilteredQuery *) query, terms); else { TermSet nonWeightedTerms; query->extractTerms(&nonWeightedTerms); for (TermSet::iterator iter = nonWeightedTerms.begin(); iter != nonWeightedTerms.end(); iter++) { Term * term = (Term *)(*iter); if ( fieldName == NULL || term->field() == fieldName ) terms->insert(_CLNEW WeightedTerm(query->getBoost(), term->text())); _CLLDECDELETE( term ); } } }
void testExtractFromBooleanQuery( CuTest * tc ) { Directory * pIndex = setUpIndex(); IndexReader * pReader = IndexReader::open( pIndex ); TermSet termSet; Term * t1 = _CLNEW Term( _T("data"), _T("aaaab") ); Term * t2 = _CLNEW Term( _T("data"), _T("aaabb") ); Term * t3 = _CLNEW Term( _T("data"), _T("aaabb") ); BooleanQuery * bq = _CLNEW BooleanQuery(); bq->add( _CLNEW TermQuery( t1 ), true, BooleanClause::SHOULD ); bq->add( _CLNEW TermQuery( t2 ), true, BooleanClause::SHOULD ); bq->add( _CLNEW TermQuery( t3 ), true, BooleanClause::SHOULD ); Query * rewrite = bq->rewrite( pReader ); rewrite->extractTerms( &termSet ); assertEqualsMsg( _T( "wrong number of terms" ), 2, termSet.size() ); for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ ) { Term * pTerm = *itTerms; assertTrueMsg( _T( "wrong term" ), ( 0 == t1->compareTo( pTerm ) || 0 == t2->compareTo( pTerm ))); } clearTermSet( termSet ); _CLLDECDELETE( t1 ); _CLLDECDELETE( t2 ); _CLLDECDELETE( t3 ); if( rewrite != bq ) _CLDELETE( rewrite ); _CLDELETE( bq ); pReader->close(); _CLDELETE( pReader ); closeIndex( pIndex ); pIndex = NULL; }
bool TermSet::operator<(const TermSet& y) const { return std::lexicographical_compare(begin(), end(), y.begin(), y.end()); }