void testEqualScores() 
    {
        // NOTE: uses index build in *this* setUp
        
        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );

	    Hits * pResult;

        // some hits match more terms then others, score should be the same
        Query * q = csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true );
        pResult = pSearch->search( q );
        size_t numHits = pResult->length();
        assertEqualsMsg( _T( "wrong number of results" ), 6, numHits );
        float_t score = pResult->score( 0 );
        for( size_t i = 1; i < numHits; i++ )
        {
            assertTrueMsg( _T( "score was not the same" ), score == pResult->score( i ));
        }
        _CLDELETE( pResult );
        _CLDELETE( q );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
    void testBoost()
    {
        // NOTE: uses index build in *this* setUp

        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );
	    Hits * pResult;

        // test for correct application of query normalization
        // must use a non score normalizing method for this.
        Query * q = csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true );
        q->setBoost( 100 );
        pResult = pSearch->search( q );
        for( size_t i = 1; i < pResult->length(); i++ )
        {
            assertTrueMsg( _T( "score was not was not correct" ), 1.0f == pResult->score( i ));
        }
        _CLDELETE( pResult );
        _CLDELETE( q );


        //
        // Ensure that boosting works to score one clause of a query higher
        // than another.
        //
        Query * q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true );  // matches document #0
        q1->setBoost( .1f );
        Query * q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true );  // matches document #1
        BooleanQuery * bq = _CLNEW BooleanQuery( true );
        bq->add( q1, true, BooleanClause::SHOULD );
        bq->add( q2, true, BooleanClause::SHOULD );

        pResult = pSearch->search( bq );
        assertEquals( 1, pResult->id( 0 ));
        assertEquals( 0, pResult->id( 1 ));
        assertTrue( pResult->score( 0 ) > pResult->score( 1 ));
        _CLDELETE( pResult );
        _CLDELETE( bq );

        q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true );  // matches document #0
        q1->setBoost( 10.0f );
        q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true );  // matches document #1
        bq = _CLNEW BooleanQuery( true );
        bq->add( q1, true, BooleanClause::SHOULD );
        bq->add( q2, true, BooleanClause::SHOULD );

        pResult = pSearch->search( bq );
        assertEquals( 0, pResult->id( 0 ));
        assertEquals( 1, pResult->id( 1 ));
        assertTrue( pResult->score( 0 ) > pResult->score( 1 ));
        _CLDELETE( pResult );
        _CLDELETE( bq );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
Exemplo n.º 3
0
void SearchFiles(const char* index){
    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
    while (true) {
        printf("Enter query string: ");
        char* tmp = fgets(line,80,stdin);
        if ( tmp == NULL ) continue;
        line[strlen(line)-1]=0;

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);

        if ( strlen(line) == 0 )
            break;
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);

        buf = q->toString(_T("contents"));
        _tprintf(_T("Searching for: %s\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();

        for ( size_t i=0;i<h->length();i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %s - %f\n"), i, doc->get(_T("path")), h->score(i));
            //print result to web interface:
            LINH_PRINT_WEB(i,doc->get(_T("path")),h->score(i));
        }

        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        _CLLDELETE(h);
        _CLLDELETE(q);

        s.close();
    }
    reader->close();
    _CLLDELETE(reader);
}
Exemplo n.º 4
0
void SearchFilesC(const char* index, const char* fobizzle){

    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
  
        //printf("Enter query string: ");
        strncpy(line,fobizzle,80);
        //line[strlen(line)-1]=0;
        

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);


        
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);
        
        buf = q->toString(_T("contents"));
        
        _tprintf(_T("Searching for: %S\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();
        //SearchData search[h->length()];
        for ( size_t i=0; i < h->length(); i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %S - %f\n"), i, doc->get(_T("path")), h->score(i));
            //search[i].set_path(doc->get(_T("path")));
        
        }


        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        _CLLDELETE(h);
        _CLLDELETE(q);

        s.close();

    reader->close();
    _CLLDELETE(reader);
};
Exemplo n.º 5
0
QMap< int, float >
FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
{
    Q_ASSERT( query->isFullTextQuery() );

    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
            {
                qDebug() << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
        }

        QueryParser parser( _T( "album" ), m_analyzer );
        QString escapedName = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );

        Query* qry = _CLNEW FuzzyQuery( _CLNEW Term( _T( "album" ), escapedName.toStdWString().c_str() ) );
        Hits* hits = m_luceneSearcher->search( qry );
        for ( uint i = 0; i < hits->length(); i++ )
        {
            Document* d = &hits->doc( i );

            float score = hits->score( i );
            int id = QString::fromWCharArray( d->get( _T( "albumid" ) ) ).toInt();

            if ( score > 0.30 )
            {
                resultsmap.insert( id, score );
//                tDebug() << "Index hit:" << id << score;
            }
        }

        delete hits;
        delete qry;
    }
    catch( CLuceneError& error )
    {
        tDebug() << "Caught CLucene error:" << error.what();
        Q_ASSERT( false );
    }

    return resultsmap;
}
Exemplo n.º 6
0
    static int EIO_Search(eio_req* req) 
    {
        search_baton_t* baton = static_cast<search_baton_t*>(req->data);

        standard::StandardAnalyzer analyzer;
        IndexReader* reader = 0;
        try {
            reader = IndexReader::open(*(*baton->index));
        } catch (CLuceneError& E) {
          baton->error.assign(E.what());
          return 0;
        } catch(...) {
          baton->error = "Got an unknown exception";
          return 0;
        }
        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ) {
            delete reader;
            reader = newreader;
        }
        IndexSearcher s(reader);

        try {
            TCHAR* searchString = STRDUP_AtoT(*(*baton->search));
            Query* q = QueryParser::parse(searchString, _T(""), &analyzer);
            Hits* hits = s.search(q);

            HandleScope scope;
            //_CLDELETE(q);
            free(searchString);
            // Build the result array
            Local<v8::Array> resultArray = v8::Array::New();
            for (size_t i=0; i < hits->length(); i++) {
                Document& doc(hits->doc(i));
                // {"id":"ab34", "score":1.0}
                Local<Object> resultObject = Object::New();
                // TODO:  This dup might be a leak
                resultObject->Set(String::New("id"), String::New(STRDUP_TtoA(doc.get(_T("_id")))));
                resultObject->Set(String::New("score"), Number::New(hits->score(i)));
                resultArray->Set(i, resultObject);
            }
            baton->results = Persistent<v8::Array>::New(resultArray);
        } catch (CLuceneError& E) {
          baton->error.assign(E.what());
        } catch(...) {
          baton->error = "Got an unknown exception";
        }

        return 0;
    }
Exemplo n.º 7
0
vector<IndexedDocument>
CLuceneIndexReader::query(const Strigi::Query& q, int off, int max) {
    vector<IndexedDocument> results;
    if (!checkReader()) {
        return results;
    }
    // handle special commands
    if (q.fields().size() == 1 && q.fields()[0].empty()
            && q.term().string().substr(0, 14) == "strigispecial:") {
        return p->strigiSpecial(q.term().string());
    }

    Query* bq = p->createQuery(q);
    IndexSearcher searcher(reader);
    Hits* hits = 0;
    int s = 0;
    try {
        hits = searcher.search(bq);
        s = hits->length();
    } catch (CLuceneError& err) {
        fprintf(stderr, "could not query: %s\n", err.what());
    }
    if (off < 0) off = 0;
    max += off;
    if (max < 0) max = s;
    if (max > s) max = s;
    if (max > off) {
        results.reserve(max-off);
    }
    for (int i = off; i < max; ++i) {
        Document *d = &hits->doc(i);
        IndexedDocument doc;
        doc.score = hits->score(i);
        DocumentFieldEnumeration* e = d->fields();
        while (e->hasMoreElements()) {
            Field* f = e->nextElement();
            Private::addField(f, doc);
        }
        results.push_back(doc);
        _CLDELETE(e);
    }
    if (hits) {
        _CLDELETE(hits);
    }
    searcher.close();
    _CLDELETE(bq);
    return results;
}
Exemplo n.º 8
0
QMap< int, float >
FuzzyIndex::search( const Tomahawk::query_ptr& query )
{
    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
            {
                qDebug() << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
        }

        float minScore;
        const TCHAR** fields = 0;
        MultiFieldQueryParser parser( fields, m_analyzer );
        BooleanQuery* qry = _CLNEW BooleanQuery();

        if ( query->isFullTextQuery() )
        {
            QString escapedQuery = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );

            Term* term = _CLNEW Term( _T( "track" ), escapedQuery.toStdWString().c_str() );
            Query* fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::SHOULD );

            term = _CLNEW Term( _T( "artist" ), escapedQuery.toStdWString().c_str() );
            fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::SHOULD );

            term = _CLNEW Term( _T( "fulltext" ), escapedQuery.toStdWString().c_str() );
            fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::SHOULD );

            minScore = 0.00;
        }
        else
        {
            QString track = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->track() ).toStdWString().c_str() ) );
            QString artist = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->artist() ).toStdWString().c_str() ) );
//            QString album = QString::fromWCharArray( parser.escape( query->album().toStdWString().c_str() ) );

            Term* term = _CLNEW Term( _T( "track" ), track.toStdWString().c_str() );
            Query* fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::MUST );

            term = _CLNEW Term( _T( "artist" ), artist.toStdWString().c_str() );
            fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::MUST );

            minScore = 0.00;
        }

        Hits* hits = m_luceneSearcher->search( qry );
        for ( uint i = 0; i < hits->length(); i++ )
        {
            Document* d = &hits->doc( i );

            float score = hits->score( i );
            int id = QString::fromWCharArray( d->get( _T( "trackid" ) ) ).toInt();

            if ( score > minScore )
            {
                resultsmap.insert( id, score );
//                tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() );
            }
        }

        delete hits;
        delete qry;
    }
    catch( CLuceneError& error )
    {
        tDebug() << "Caught CLucene error:" << error.what();
        Q_ASSERT( false );
    }

    return resultsmap;
}
Exemplo n.º 9
0
SearchData *SearchFilesC(const char* index, const char* fobizzle){

    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
  
        //printf("Enter query string: ");
        strncpy(line,fobizzle,80);
        //line[strlen(line)-1]=0;
        

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);


        
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);
        
        buf = q->toString(_T("contents"));
        
        _tprintf(_T("Searching for: %S\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();
        arrsize = h->length();
        SearchData *search = new SearchData[h->length()];
        for ( size_t i=0; i < h->length(); i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %S - %f\n"), i, doc->get(_T("path")), h->score(i));
            //const TCHAR* wtfbatman;
            //wtfbatman =  doc->get(_T("path"));
            //search[(int)i].score =  h->score(i);
            //printf("Adding %S %d\n", search[i].path, i);
            char *wtfbbq;
            wtfbbq = new char[100];
            sprintf(wtfbbq,"%S %f", doc->get(_T("path")), h->score(i));
            search[(int)i].path = wtfbbq;
            //sprintf(str,"%S", String::New((char*)doc->get(_T("path")),5));
            //printf("PIZZA %s\n", wtfbbq);
            //sprintf(search[i].path,"%S",(const char*)doc->get(_T("path")));
            //printf("segfault");
            //strcpy(search[i].path,(const char*)doc->get(_T("path")));
        
        }


        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        //_CLLDELETE(h);
        //_CLLDELETE(q);

        //s.close();

    //reader->close();
    //_CLLDELETE(reader);
    //printf("Testing %S\n\n", search[0].path);
    return search;
};
Exemplo n.º 10
0
QMap< int, float >
FuzzyIndex::search( const QString& table, const QString& name, bool fulltext )
{
    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
            {
                qDebug() << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
        }

        if ( name.isEmpty() )
            return resultsmap;

        Hits* hits = 0;
        Query* qry = 0;
        QueryParser parser( table.toStdWString().c_str(), m_analyzer );

        if ( fulltext )
        {
            QString escapedName = QString::fromWCharArray( parser.escape( name.toStdWString().c_str() ) );

            QStringList sl = DatabaseImpl::sortname( escapedName ).split( " ", QString::SkipEmptyParts );
            qry = parser.parse( QString( "%1:%2~" ).arg( table ).arg( sl.join( "~ " ) ).toStdWString().c_str() );
        }
        else
        {
//            qry = _CLNEW FuzzyQuery( _CLNEW Term( table.toStdWString().c_str(), DatabaseImpl::sortname( name ).toStdWString().c_str() ) );
            QString escapedName = QString::fromWCharArray( parser.escape( name.toStdWString().c_str() ) );

            QStringList sl = DatabaseImpl::sortname( escapedName ).split( " ", QString::SkipEmptyParts );
            qry = parser.parse( QString( "%1:\"%2\"~" ).arg( table ).arg( sl.join( " " ) ).toStdWString().c_str() );
        }

        hits = m_luceneSearcher->search( qry );
        for ( uint i = 0; i < hits->length(); i++ )
        {
            Document* d = &hits->doc( i );

            float score = hits->score( i );
            int id = QString::fromWCharArray( d->get( _T( "id" ) ) ).toInt();
            QString result = QString::fromWCharArray( d->get( table.toStdWString().c_str() ) );

            if ( DatabaseImpl::sortname( result ) == DatabaseImpl::sortname( name ) )
                score = 1.0;
            else
                score = qMin( score, (float)0.99 );

            if ( score > 0.20 )
            {
                resultsmap.insert( id, score );
//                qDebug() << "Hitres:" << result << id << score << table << name;
            }
        }

        delete hits;
        delete qry;
    }
    catch( CLuceneError& error )
    {
        tDebug() << "Caught CLucene error:" << error.what();
        Q_ASSERT( false );
    }

    return resultsmap;
}