Beispiel #1
0
void testExtractFromWildcardQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
    WildcardQuery * wildcard;
    Term *          t1;
    Query *         rewrite;


    t1 = _CLNEW Term( _T("data"), _T("aaaa?") );
    wildcard = _CLNEW WildcardQuery( t1 );
    rewrite = wildcard->rewrite( pReader );
    rewrite->extractTerms( &termSet );
    _CLLDECDELETE( t1 );

    assertEqualsMsg( _T( "wrong number of terms" ), 3, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        if(    0 != _tcscmp( _T( "aaaaa" ), pTerm->text()) 
            && 0 != _tcscmp( _T( "aaaab" ), pTerm->text())
            && 0 != _tcscmp( _T( "aaaac" ), pTerm->text()))
        {
            assertTrueMsg( _T( "wrong term" ), false );
        }
    }

    clearTermSet( termSet );
    if( rewrite != wildcard )
        _CLDELETE( rewrite );
    _CLDELETE( wildcard );
    

    t1 = _CLNEW Term( _T("data"), _T("aaa*") );
    wildcard = _CLNEW WildcardQuery( t1 );
    rewrite = wildcard->rewrite( pReader );
    rewrite->extractTerms( &termSet );
    _CLLDECDELETE( t1 );

    assertEqualsMsg( _T( "wrong number of terms" ), 5, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        assertTrueMsg( _T( "wrong term" ), ( 0 == _tcsncmp( _T( "aaa" ), pTerm->text(), 3 )));
    }

    clearTermSet( termSet );
    if( rewrite != wildcard )
        _CLDELETE( rewrite );
    _CLDELETE( wildcard );


    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}
    void testEqualScores() 
    {
        // NOTE: uses index build in *this* setUp
        
        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );

	    Hits * pResult;

        // some hits match more terms then others, score should be the same
        Query * q = csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true );
        pResult = pSearch->search( q );
        size_t numHits = pResult->length();
        assertEqualsMsg( _T( "wrong number of results" ), 6, numHits );
        float_t score = pResult->score( 0 );
        for( size_t i = 1; i < numHits; i++ )
        {
            assertTrueMsg( _T( "score was not the same" ), score == pResult->score( i ));
        }
        _CLDELETE( pResult );
        _CLDELETE( q );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
    void testBoost()
    {
        // NOTE: uses index build in *this* setUp

        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );
	    Hits * pResult;

        // test for correct application of query normalization
        // must use a non score normalizing method for this.
        Query * q = csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true );
        q->setBoost( 100 );
        pResult = pSearch->search( q );
        for( size_t i = 1; i < pResult->length(); i++ )
        {
            assertTrueMsg( _T( "score was not was not correct" ), 1.0f == pResult->score( i ));
        }
        _CLDELETE( pResult );
        _CLDELETE( q );


        //
        // Ensure that boosting works to score one clause of a query higher
        // than another.
        //
        Query * q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true );  // matches document #0
        q1->setBoost( .1f );
        Query * q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true );  // matches document #1
        BooleanQuery * bq = _CLNEW BooleanQuery( true );
        bq->add( q1, true, BooleanClause::SHOULD );
        bq->add( q2, true, BooleanClause::SHOULD );

        pResult = pSearch->search( bq );
        assertEquals( 1, pResult->id( 0 ));
        assertEquals( 0, pResult->id( 1 ));
        assertTrue( pResult->score( 0 ) > pResult->score( 1 ));
        _CLDELETE( pResult );
        _CLDELETE( bq );

        q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true );  // matches document #0
        q1->setBoost( 10.0f );
        q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true );  // matches document #1
        bq = _CLNEW BooleanQuery( true );
        bq->add( q1, true, BooleanClause::SHOULD );
        bq->add( q2, true, BooleanClause::SHOULD );

        pResult = pSearch->search( bq );
        assertEquals( 0, pResult->id( 0 ));
        assertEquals( 1, pResult->id( 1 ));
        assertTrue( pResult->score( 0 ) > pResult->score( 1 ));
        _CLDELETE( pResult );
        _CLDELETE( bq );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
void TestSpansAdvanced::assertHits( Query * query, const TCHAR * description, const TCHAR ** expectedIds, float_t * expectedScores, size_t expectedCount )
{
    float_t tolerance = 1e-5f;

    QueryUtils::check( tc, query, searcher );

    // Hits hits = searcher.search(query);
    // hits normalizes and throws things off if one score is greater than 1.0
    TopDocs * topdocs = searcher->_search( query, NULL, 10000 );

    /*****
    // display the hits
    System.out.println(hits.length() + " hits for search: \"" + description + '\"');
    for (int i = 0; i < hits.length(); i++) {
        System.out.println("  " + FIELD_ID + ':' + hits.doc(i).get(FIELD_ID) + " (score:" + hits.score(i) + ')');
    }
    *****/

    // did we get the hits we expected
    assertEquals( expectedCount, topdocs->totalHits );
    for( size_t i = 0; i < expectedCount; i++ )
    {
        //System.out.println(i + " exp: " + expectedIds[i]);
        //System.out.println(i + " field: " + hits.doc(i).get(FIELD_ID));

        int32_t id = topdocs->scoreDocs[ i ].doc;
        float_t score = topdocs->scoreDocs[ i ].score;
        Document doc;
        searcher->doc( id, doc );
        assertTrueMsg( _T( "actual document does not match the expected one" ), 0 == _tcscmp( expectedIds[ i ], doc.get( field_id )));
        assertTrueMsg( _T( "score does not match" ), ( expectedScores[ i ] > score ? expectedScores[ i ] - score : score - expectedScores[ i ] ) < tolerance );
        
        Explanation exp;
        searcher->explain( query, id, &exp );
        
        float_t sd = exp.getDetail( 0 )->getValue() - score;
        if ( sd < 0 ) sd *= -1;
        assertTrueMsg( _T( "explained score does not match" ), sd < tolerance );
    }

    _CLLDELETE( topdocs );
}
Beispiel #5
0
void testExtractFromFuzzyQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
    FuzzyQuery *    fuzzy;
    Term *          t1;
    Query *         rewrite;


    t1 = _CLNEW Term( _T("data"), _T("aaaab") );
    fuzzy = _CLNEW FuzzyQuery( t1, 0.7f );
    rewrite = fuzzy->rewrite( pReader );
    rewrite->extractTerms( &termSet );
    _CLLDECDELETE( t1 );

    assertEqualsMsg( _T( "wrong number of terms" ), 4, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        if(    0 != _tcscmp( _T( "aaaaa" ), pTerm->text()) 
            && 0 != _tcscmp( _T( "aaaab" ), pTerm->text())
            && 0 != _tcscmp( _T( "aaabb" ), pTerm->text())
            && 0 != _tcscmp( _T( "aaaac" ), pTerm->text()))
        {
            assertTrueMsg( _T( "wrong term" ), false );
        }
    }

    clearTermSet( termSet );
    if( rewrite != fuzzy )
        _CLDELETE( rewrite );
    _CLDELETE( fuzzy );
    
    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}
Beispiel #6
0
void testExtractFromBooleanQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
 
    Term * t1 = _CLNEW Term( _T("data"), _T("aaaab") );
    Term * t2 = _CLNEW Term( _T("data"), _T("aaabb") );
    Term * t3 = _CLNEW Term( _T("data"), _T("aaabb") );
    BooleanQuery * bq = _CLNEW BooleanQuery();
    bq->add( _CLNEW TermQuery( t1 ), true, BooleanClause::SHOULD );
    bq->add( _CLNEW TermQuery( t2 ), true, BooleanClause::SHOULD );
    bq->add( _CLNEW TermQuery( t3 ), true, BooleanClause::SHOULD );

    Query * rewrite = bq->rewrite( pReader );

    rewrite->extractTerms( &termSet );
    assertEqualsMsg( _T( "wrong number of terms" ), 2, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        assertTrueMsg( _T( "wrong term" ), ( 0 == t1->compareTo( pTerm ) || 0 == t2->compareTo( pTerm )));
    }
    clearTermSet( termSet );

    _CLLDECDELETE( t1 );
    _CLLDECDELETE( t2 );
    _CLLDECDELETE( t3 );

    if( rewrite != bq )
        _CLDELETE( rewrite );
    _CLDELETE( bq );
    
    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}