Beispiel #1
0
	void _TestSearchesRun(CuTest *tc, Analyzer* analyzer, Searcher* search, const TCHAR* qry){
		Query* q = NULL;
		Hits* h = NULL;
		try{
			q = QueryParser::parse(qry , _T("contents"), analyzer);
			if ( q != NULL ){
			    h = search->search( q );

			    if ( h->length() > 0 ){
			    //check for explanation memory leaks...
          CL_NS(search)::Explanation expl1;
					search->explain(q, h->id(0), &expl1);
					TCHAR* tmp = expl1.toString();
					_CLDELETE_CARRAY(tmp);
					if ( h->length() > 1 ){ //do a second one just in case
						CL_NS(search)::Explanation expl2;
						search->explain(q, h->id(1), &expl2);
						tmp = expl2.toString();
						_CLDELETE_CARRAY(tmp);
					}
				}
			}
    }catch(CLuceneError& err){
      CuFail(tc,_T("Error: %s\n"), err.twhat());
    }catch(...){
      CuFail(tc,_T("Error: unknown\n"));
    }
		_CLDELETE(h);
		_CLDELETE(q);
	}
void testIncludeLowerTrue(CuTest* tc)
{
    WhitespaceAnalyzer a;
    RAMDirectory* index = _CLNEW RAMDirectory();
    IndexWriter* writer = _CLNEW IndexWriter(index,
        &a, true);

    Document doc;
    doc.add(*_CLNEW Field(_T("Category"), _T("a 1"), Field::STORE_YES | Field::INDEX_TOKENIZED));
    writer->addDocument(&doc); doc.clear();

    doc.add(*_CLNEW Field(_T("Category"), _T("a 2"), Field::STORE_YES | Field::INDEX_TOKENIZED));
    writer->addDocument(&doc); doc.clear();

    doc.add(*_CLNEW Field(_T("Category"), _T("a 3"), Field::STORE_YES | Field::INDEX_TOKENIZED));
    writer->addDocument(&doc); doc.clear();

    writer->close();
    _CLLDELETE(writer);

    IndexSearcher* s = _CLNEW IndexSearcher(index);
    Filter* f = _CLNEW RangeFilter(_T("Category"), _T("3"), _T("3"), true, true);

    Term* t = _CLNEW Term(_T("Category"), _T("a"));
    Query* q1 = _CLNEW TermQuery(t);
    _CLLDECDELETE(t);

    t = _CLNEW Term(_T("Category"), _T("3"));
    Query* q2 = _CLNEW TermQuery(t);
    _CLLDECDELETE(t);

    Hits* h = s->search(q1);
    assertTrue(h->length() == 3);
    _CLLDELETE(h);

    h = s->search(q2);
    assertTrue(h->length() == 1);
    _CLLDELETE(h);

    h = s->search(q1, f);
    assertTrue(h->length() == 1);
    _CLLDELETE(h);

    s->close();
    _CLLDELETE(s);
    _CLLDELETE(q1);
    _CLLDELETE(q2);
    _CLLDELETE(f);

    index->close();
    _CLLDECDELETE(index);
}
Beispiel #3
0
/**
  Returns a Entry.
  \id The key of the entry.
  */
Response * ZefaniaLex::getEntry(const QString &key)
{
    try {
        if(!hasIndex()) {
            if(buildIndex() != 0) {
                return new StringResponse(QObject::tr("Cannot build index."));
            }
        }
        const QString index = indexPath();
        const QString queryText = "key:" + key;
        const TCHAR* stop_words[] = { nullptr };
        standard::StandardAnalyzer analyzer(stop_words);
        IndexReader* reader = IndexReader::open(index.toStdString().c_str());
        IndexSearcher s(reader);

        Query* q = QueryParser::parse(SearchTools::toTCHAR(queryText), _T("content"), &analyzer);
        Hits* h = s.search(q);
        QString ret = "";
        for(size_t i = 0; i < h->length(); i++) {
            Document* doc = &h->doc(i);
            if(!ret.isEmpty())
                ret.append("<hr /> ");
            ret.append(SearchTools::toQString(doc->get(_T("content"))));
        }
        return ret.isEmpty() ? new StringResponse(QObject::tr("Nothing found for %1").arg(key)) : new StringResponse(ret);
    } catch(...) {
        return new StringResponse(QString());
    }
}
    void testEqualScores() 
    {
        // NOTE: uses index build in *this* setUp
        
        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );

	    Hits * pResult;

        // some hits match more terms then others, score should be the same
        Query * q = csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true );
        pResult = pSearch->search( q );
        size_t numHits = pResult->length();
        assertEqualsMsg( _T( "wrong number of results" ), 6, numHits );
        float_t score = pResult->score( 0 );
        for( size_t i = 1; i < numHits; i++ )
        {
            assertTrueMsg( _T( "score was not the same" ), score == pResult->score( i ));
        }
        _CLDELETE( pResult );
        _CLDELETE( q );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
    void testBoost()
    {
        // NOTE: uses index build in *this* setUp

        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );
	    Hits * pResult;

        // test for correct application of query normalization
        // must use a non score normalizing method for this.
        Query * q = csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true );
        q->setBoost( 100 );
        pResult = pSearch->search( q );
        for( size_t i = 1; i < pResult->length(); i++ )
        {
            assertTrueMsg( _T( "score was not was not correct" ), 1.0f == pResult->score( i ));
        }
        _CLDELETE( pResult );
        _CLDELETE( q );


        //
        // Ensure that boosting works to score one clause of a query higher
        // than another.
        //
        Query * q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true );  // matches document #0
        q1->setBoost( .1f );
        Query * q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true );  // matches document #1
        BooleanQuery * bq = _CLNEW BooleanQuery( true );
        bq->add( q1, true, BooleanClause::SHOULD );
        bq->add( q2, true, BooleanClause::SHOULD );

        pResult = pSearch->search( bq );
        assertEquals( 1, pResult->id( 0 ));
        assertEquals( 0, pResult->id( 1 ));
        assertTrue( pResult->score( 0 ) > pResult->score( 1 ));
        _CLDELETE( pResult );
        _CLDELETE( bq );

        q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true );  // matches document #0
        q1->setBoost( 10.0f );
        q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true );  // matches document #1
        bq = _CLNEW BooleanQuery( true );
        bq->add( q1, true, BooleanClause::SHOULD );
        bq->add( q2, true, BooleanClause::SHOULD );

        pResult = pSearch->search( bq );
        assertEquals( 0, pResult->id( 0 ));
        assertEquals( 1, pResult->id( 1 ));
        assertTrue( pResult->score( 0 ) > pResult->score( 1 ));
        _CLDELETE( pResult );
        _CLDELETE( bq );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
Beispiel #6
0
void SearchFilesC(const char* index, const char* fobizzle){

    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
  
        //printf("Enter query string: ");
        strncpy(line,fobizzle,80);
        //line[strlen(line)-1]=0;
        

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);


        
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);
        
        buf = q->toString(_T("contents"));
        
        _tprintf(_T("Searching for: %S\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();
        //SearchData search[h->length()];
        for ( size_t i=0; i < h->length(); i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %S - %f\n"), i, doc->get(_T("path")), h->score(i));
            //search[i].set_path(doc->get(_T("path")));
        
        }


        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        _CLLDELETE(h);
        _CLLDELETE(q);

        s.close();

    reader->close();
    _CLLDELETE(reader);
};
    void testBooleanOrderUnAffected()
    {
        // NOTE: uses index build in *this* setUp

        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );

        // first do a regular RangeQuery which uses term expansion so
        // docs with more terms in range get higher scores
        Term * pLower = _CLNEW Term( _T( "data" ), _T( "1" ));
        Term * pUpper = _CLNEW Term( _T( "data" ), _T( "4" ));
        Query * rq = _CLNEW RangeQuery( pLower, pUpper, true );
        _CLLDECDELETE( pUpper );
        _CLLDECDELETE( pLower );

        Hits * pExpected = pSearch->search( rq );
        size_t numHits = pExpected->length();
 
        // now do a boolean where which also contains a
        // ConstantScoreRangeQuery and make sure the order is the same
        
        BooleanQuery * q = _CLNEW BooleanQuery();
        q->add( rq, true, BooleanClause::MUST );
        q->add( csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true ), true, BooleanClause::MUST );
 
        Hits * pActual = pSearch->search( q );
        assertEqualsMsg( _T( "wrong number of hits" ), numHits, pActual->length() );
        for( size_t i = 0; i < numHits; i++ )
        {
            assertEqualsMsg( _T( "mismatch in docid for a hit" ), pExpected->id( i ), pActual->id( i ));
        }
        _CLDELETE( pActual );
        _CLDELETE( pExpected );
        _CLDELETE( q );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
Beispiel #8
0
void SearchFiles(const char* index){
    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
    while (true) {
        printf("Enter query string: ");
        char* tmp = fgets(line,80,stdin);
        if ( tmp == NULL ) continue;
        line[strlen(line)-1]=0;

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);

        if ( strlen(line) == 0 )
            break;
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);

        buf = q->toString(_T("contents"));
        _tprintf(_T("Searching for: %s\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();

        for ( size_t i=0;i<h->length();i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %s - %f\n"), i, doc->get(_T("path")), h->score(i));
            //print result to web interface:
            LINH_PRINT_WEB(i,doc->get(_T("path")),h->score(i));
        }

        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        _CLLDELETE(h);
        _CLLDELETE(q);

        s.close();
    }
    reader->close();
    _CLLDELETE(reader);
}
Beispiel #9
0
QMap< int, float >
FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
{
    Q_ASSERT( query->isFullTextQuery() );

    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
            {
                qDebug() << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
        }

        QueryParser parser( _T( "album" ), m_analyzer );
        QString escapedName = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );

        Query* qry = _CLNEW FuzzyQuery( _CLNEW Term( _T( "album" ), escapedName.toStdWString().c_str() ) );
        Hits* hits = m_luceneSearcher->search( qry );
        for ( uint i = 0; i < hits->length(); i++ )
        {
            Document* d = &hits->doc( i );

            float score = hits->score( i );
            int id = QString::fromWCharArray( d->get( _T( "albumid" ) ) ).toInt();

            if ( score > 0.30 )
            {
                resultsmap.insert( id, score );
//                tDebug() << "Index hit:" << id << score;
            }
        }

        delete hits;
        delete qry;
    }
    catch( CLuceneError& error )
    {
        tDebug() << "Caught CLucene error:" << error.what();
        Q_ASSERT( false );
    }

    return resultsmap;
}
Beispiel #10
0
    static int EIO_Search(eio_req* req) 
    {
        search_baton_t* baton = static_cast<search_baton_t*>(req->data);

        standard::StandardAnalyzer analyzer;
        IndexReader* reader = 0;
        try {
            reader = IndexReader::open(*(*baton->index));
        } catch (CLuceneError& E) {
          baton->error.assign(E.what());
          return 0;
        } catch(...) {
          baton->error = "Got an unknown exception";
          return 0;
        }
        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ) {
            delete reader;
            reader = newreader;
        }
        IndexSearcher s(reader);

        try {
            TCHAR* searchString = STRDUP_AtoT(*(*baton->search));
            Query* q = QueryParser::parse(searchString, _T(""), &analyzer);
            Hits* hits = s.search(q);

            HandleScope scope;
            //_CLDELETE(q);
            free(searchString);
            // Build the result array
            Local<v8::Array> resultArray = v8::Array::New();
            for (size_t i=0; i < hits->length(); i++) {
                Document& doc(hits->doc(i));
                // {"id":"ab34", "score":1.0}
                Local<Object> resultObject = Object::New();
                // TODO:  This dup might be a leak
                resultObject->Set(String::New("id"), String::New(STRDUP_TtoA(doc.get(_T("_id")))));
                resultObject->Set(String::New("score"), Number::New(hits->score(i)));
                resultArray->Set(i, resultObject);
            }
            baton->results = Persistent<v8::Array>::New(resultArray);
        } catch (CLuceneError& E) {
          baton->error.assign(E.what());
        } catch(...) {
          baton->error = "Got an unknown exception";
        }

        return 0;
    }
vector<pair<string,uint32_t> >
CLuceneIndexReader::histogram(const string& query,
        const string& fieldname, const string& labeltype) {
    vector<pair<string,uint32_t> > h;
    if (!checkReader()) {
        return h;
    }
    Strigi::QueryParser parser;
    Strigi::Query q = parser.buildQuery(query);
    Query* bq = p->createQuery(q);
    IndexSearcher searcher(reader);
    Hits* hits = 0;
    int s = 0;
    try {
        hits = searcher.search(bq);
        s = hits->length();
    } catch (CLuceneError& err) {
        fprintf(stderr, "could not query: %s\n", err.what());
    }
    wstring field = utf8toucs2(fieldname);
    int32_t max = INT_MIN;
    int32_t min = INT_MAX;
    vector<int32_t> values;
    values.reserve(s);
    char* end;
    for (int i = 0; i < s; ++i) {
        Document *d = &hits->doc(i);
        const TCHAR* v = d->get(field.c_str());
        if (v) {
            int val = (int)strtol(wchartoutf8( v ).c_str(), &end, 10);
            if ( *end != 0) {
                _CLDELETE(hits);
                return h;
            }
            values.push_back(val);
            max = (max>val) ?max :val;
            min = (min<val) ?min :val;
        }
    }
    if (hits) {
        _CLDELETE(hits);
    }
    searcher.close();
    _CLDELETE(bq);
    if (fieldname == FieldRegister::mtimeFieldName || labeltype == "time") {
        return makeTimeHistogram(values);
    } else {
        return makeHistogram(values, min, max);
    }
}
int32_t
CLuceneIndexReader::countHits(const Strigi::Query& q) {
    if (!checkReader()) return -1;
    // if the query is empty, we return the number of files in the index
    if (q.term().string().size() == 0 && q.subQueries().size() == 0) {
        return countDocuments();
    }

    Query* bq = p->createQuery(q);
    if (reader == 0) {
        return 0;
    }
    IndexSearcher searcher(reader);
    vector<IndexedDocument> results;
    Hits* hits = 0;
    int s = 0;
    try {
        hits = searcher.search(bq);
        s = hits->length();
    } catch (CLuceneError& err) {
/*        HitCounter counter;
        QueryFilter* filter = _CLNEW QueryFilter(&bq);
        try {
        BitSet* bits = filter->bits(reader);
        int32_t n = bits->size();
        for (int32_t i=0; i<n; ++i) {
            if (bits->get(i)) s++;
        }
        } catch (CLuceneError& err2) {
            printf("ccould not query: %s\n", err.what());
        }
        try {
            searcher._search(0, filter, &counter);
        } catch (CLuceneError& err2) {
            printf("ccould not query: %s\n", err.what());
        }
        s = counter.count();

        printf("counted %i hits\n", count);
        // try to do a constant score query
        //QueryFilter* filter = _CLNEW QueryFilter(&bq);
        ConstantScoreQuery csq(filter);*/
        fprintf(stderr, "could not query: %s\n", err.what());
    }
    delete hits;
    searcher.close();
    _CLDELETE(bq);
    return s;
}
vector<IndexedDocument>
CLuceneIndexReader::query(const Strigi::Query& q, int off, int max) {
    vector<IndexedDocument> results;
    if (!checkReader()) {
        return results;
    }
    // handle special commands
    if (q.fields().size() == 1 && q.fields()[0].empty()
            && q.term().string().substr(0, 14) == "strigispecial:") {
        return p->strigiSpecial(q.term().string());
    }

    Query* bq = p->createQuery(q);
    IndexSearcher searcher(reader);
    Hits* hits = 0;
    int s = 0;
    try {
        hits = searcher.search(bq);
        s = hits->length();
    } catch (CLuceneError& err) {
        fprintf(stderr, "could not query: %s\n", err.what());
    }
    if (off < 0) off = 0;
    max += off;
    if (max < 0) max = s;
    if (max > s) max = s;
    if (max > off) {
        results.reserve(max-off);
    }
    for (int i = off; i < max; ++i) {
        Document *d = &hits->doc(i);
        IndexedDocument doc;
        doc.score = hits->score(i);
        DocumentFieldEnumeration* e = d->fields();
        while (e->hasMoreElements()) {
            Field* f = e->nextElement();
            Private::addField(f, doc);
        }
        results.push_back(doc);
        _CLDELETE(e);
    }
    if (hits) {
        _CLDELETE(hits);
    }
    searcher.close();
    _CLDELETE(bq);
    return results;
}
void
CLuceneIndexReader::getChildren(const std::string& parent,
            std::map<std::string, time_t>& children) {
    children.clear();
    // force a fresh reader. This is important because the function
    // getChildren is essential for updating the index
    if ( !checkReader(true) ) {
        return;
    }
    // build a query
    Term* t = Private::createKeywordTerm(Private::parentlocation(),
        parent);
    Query* q = _CLNEW TermQuery(t);
    _CLDECDELETE(t);
    IndexSearcher searcher(reader);
    Hits* hits = 0;
    int nhits = 0;
    try {
        hits = searcher.search(q);
        nhits = hits->length();
    } catch (CLuceneError& err) {
        fprintf(stderr, "could not query: %s\n", err.what());
    }
    const TCHAR* mtime = mapId(Private::mtime());
    for (int i = 0; i < nhits; ++i) {
        Document* d = &hits->doc(i);

        const TCHAR* v = d->get(mtime);
        // check that mtime is defined for this document
        if (v) {
            time_t mtime = atoi(wchartoutf8( v ).c_str());
            v = d->get(Private::systemlocation());
            if (v) {
                children[wchartoutf8( v )] = mtime;
            }
        }

    }
    if (hits) {
        _CLDELETE(hits);
    }
    searcher.close();
    _CLDELETE(q);
}
Beispiel #15
0
/// TestBooleanScorer.java, ported 5/9/2009
void testBooleanScorer(CuTest *tc) {
    const TCHAR* FIELD = _T("category");
    RAMDirectory directory;

    TCHAR* values[] = { _T("1"), _T("2"), _T("3"), _T("4"), NULL};

    try {
        WhitespaceAnalyzer a;
        IndexWriter* writer = _CLNEW IndexWriter(&directory, &a, true);
        for (size_t i = 0; values[i]!=NULL; i++) {
            Document* doc = _CLNEW Document();
            doc->add(*_CLNEW Field(FIELD, values[i], Field::STORE_YES | Field::INDEX_TOKENIZED));
            writer->addDocument(doc);
            _CLLDELETE(doc);
        }
        writer->close();
        _CLLDELETE(writer);

        BooleanQuery* booleanQuery1 = _CLNEW BooleanQuery();
        Term *t = _CLNEW Term(FIELD, _T("1"));
        booleanQuery1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD);
        _CLDECDELETE(t);
        t = _CLNEW Term(FIELD, _T("2"));
        booleanQuery1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD);
        _CLDECDELETE(t);

        BooleanQuery* query = _CLNEW BooleanQuery();
        query->add(booleanQuery1, true, BooleanClause::MUST);
        t = _CLNEW Term(FIELD, _T("9"));
        query->add(_CLNEW TermQuery(t), true, BooleanClause::MUST_NOT);
        _CLDECDELETE(t);

        IndexSearcher *indexSearcher = _CLNEW IndexSearcher(&directory);
        Hits *hits = indexSearcher->search(query);
        CLUCENE_ASSERT(2 == hits->length()); // Number of matched documents
        _CLLDELETE(hits);
        _CLLDELETE(indexSearcher);

        _CLLDELETE(query);
    }
    catch (CLuceneError& e) {
        CuFail(tc, e.twhat());
    }
}
Beispiel #16
0
/**
  Returns a Entry.
  \id The key of the entry.
  */
QString ZefaniaLex::getEntry(const QString &key)
{
    try {
        if(!hasIndex()) {
            if(buildIndex() != 0) {
                return QObject::tr("Cannot build index.");
            }
        }
        const QString index = indexPath();
        const QString queryText = "key:" + key;
        const TCHAR* stop_words[] = { NULL };
        standard::StandardAnalyzer analyzer(stop_words);
        IndexReader* reader = IndexReader::open(index.toStdString().c_str());
        IndexSearcher s(reader);
    #ifdef OBV_USE_WSTRING
        Query* q = QueryParser::parse(queryText.toStdWString().c_str(), _T("content"), &analyzer);
    #else
        Query* q = QueryParser::parse(reinterpret_cast<const wchar_t *>(queryText.utf16()), _T("content"), &analyzer);
    #endif
        Hits* h = s.search(q);
        QString ret = "";
        for(size_t i = 0; i < h->length(); i++) {
            Document* doc = &h->doc(i);
            if(!ret.isEmpty())
                ret.append("<hr /> ");
    #ifdef OBV_USE_WSTRING
            ret.append(QString::fromWCharArray(doc->get(_T("content"))));
    #else
            ret.append(QString::fromUtf16((const ushort*)doc->get(_T("content"))));
    #endif
        }
        return ret.isEmpty() ? QObject::tr("Nothing found for %1").arg(key) : ret;
    } catch(...) {
        return QString();
    }
}
Beispiel #17
0
QMap< int, float >
FuzzyIndex::search( const Tomahawk::query_ptr& query )
{
    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
            {
                qDebug() << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
        }

        float minScore;
        const TCHAR** fields = 0;
        MultiFieldQueryParser parser( fields, m_analyzer );
        BooleanQuery* qry = _CLNEW BooleanQuery();

        if ( query->isFullTextQuery() )
        {
            QString escapedQuery = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );

            Term* term = _CLNEW Term( _T( "track" ), escapedQuery.toStdWString().c_str() );
            Query* fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::SHOULD );

            term = _CLNEW Term( _T( "artist" ), escapedQuery.toStdWString().c_str() );
            fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::SHOULD );

            term = _CLNEW Term( _T( "fulltext" ), escapedQuery.toStdWString().c_str() );
            fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::SHOULD );

            minScore = 0.00;
        }
        else
        {
            QString track = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->track() ).toStdWString().c_str() ) );
            QString artist = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->artist() ).toStdWString().c_str() ) );
//            QString album = QString::fromWCharArray( parser.escape( query->album().toStdWString().c_str() ) );

            Term* term = _CLNEW Term( _T( "track" ), track.toStdWString().c_str() );
            Query* fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::MUST );

            term = _CLNEW Term( _T( "artist" ), artist.toStdWString().c_str() );
            fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::MUST );

            minScore = 0.00;
        }

        Hits* hits = m_luceneSearcher->search( qry );
        for ( uint i = 0; i < hits->length(); i++ )
        {
            Document* d = &hits->doc( i );

            float score = hits->score( i );
            int id = QString::fromWCharArray( d->get( _T( "trackid" ) ) ).toInt();

            if ( score > minScore )
            {
                resultsmap.insert( id, score );
//                tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() );
            }
        }

        delete hits;
        delete qry;
    }
    catch( CLuceneError& error )
    {
        tDebug() << "Caught CLucene error:" << error.what();
        Q_ASSERT( false );
    }

    return resultsmap;
}
    void testRangeFilterId() {

        IndexReader* reader = IndexReader::open(index);
        IndexSearcher* search = new IndexSearcher(reader);

        int medId = ((maxId - minId) / 2);

        std::tstring minIPstr = pad(minId);
        const TCHAR* minIP = minIPstr.c_str();

        std::tstring maxIPstr = pad(maxId);
        const TCHAR* maxIP = maxIPstr.c_str();

        std::tstring medIPstr = pad(medId);
        const TCHAR* medIP = medIPstr.c_str();

        size_t numDocs = static_cast<size_t>(reader->numDocs());

        assertEqualsMsg(_T("num of docs"), numDocs, static_cast<size_t>(1+ maxId - minId));

        Hits* result;
        Term* term = _CLNEW Term(_T("body"),_T("body"));
        Query* q = _CLNEW TermQuery(term);
        _CLDECDELETE(term);

        // test id, bounded on both ends

        Filter* f = _CLNEW RangeFilter(_T("id"),minIP,maxIP,T,T);
        result = search->search(q, f);
        assertEqualsMsg(_T("find all"), numDocs, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),minIP,maxIP,T,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("all but last"), numDocs-1, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        f =_CLNEW RangeFilter(_T("id"),minIP,maxIP,F,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("all but first"), numDocs-1, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),minIP,maxIP,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("all but ends"), numDocs-2, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),medIP,maxIP,T,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("med and up"), 1+ maxId-medId, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),minIP,medIP,T,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("up to med"), 1+ medId-minId, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        // unbounded id

        f=_CLNEW RangeFilter(_T("id"),minIP,NULL,T,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("min and up"), numDocs, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),NULL,maxIP,F,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("max and down"), numDocs, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),minIP,NULL,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("not min, but up"), numDocs-1, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),NULL,maxIP,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("not max, but down"), numDocs-1, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),medIP,maxIP,T,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("med and up, not max"), maxId-medId, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),minIP,medIP,F,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("not min, up to med"), medId-minId, result->length());
        _CLLDELETE(result);
        _CLLDELETE(f);

        // very small sets

        f=_CLNEW RangeFilter(_T("id"),minIP,minIP,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("min,min,F,F"), 0, result->length());
        _CLLDELETE(result); _CLLDELETE(f);
        f=_CLNEW RangeFilter(_T("id"),medIP,medIP,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("med,med,F,F"), 0, result->length());
        _CLLDELETE(result); _CLLDELETE(f);
        f=_CLNEW RangeFilter(_T("id"),maxIP,maxIP,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("max,max,F,F"), 0, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),minIP,minIP,T,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("min,min,T,T"), 1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);
        f=_CLNEW RangeFilter(_T("id"),NULL,minIP,F,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("nul,min,F,T"), 1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),maxIP,maxIP,T,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("max,max,T,T"), 1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);
        f=_CLNEW RangeFilter(_T("id"),maxIP,NULL,T,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("max,nul,T,T"), 1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("id"),medIP,medIP,T,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("med,med,T,T"), 1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        search->close();
        _CLLDELETE(search);

        reader->close();
        _CLLDELETE(reader);

        _CLLDELETE(q);
    }
    void testRangeFilterRand()
    {
        IndexReader* reader = IndexReader::open(index);
        IndexSearcher* search = _CLNEW IndexSearcher(reader);

        std::tstring minRPstr = pad(minR);
        const TCHAR* minRP = minRPstr.c_str();
        
        std::tstring maxRPstr = pad(maxR);
        const TCHAR* maxRP = maxRPstr.c_str();

        size_t numDocs = static_cast<size_t>(reader->numDocs());

        assertEqualsMsg(_T("num of docs"), numDocs, 1+ maxId - minId);

        Hits* result;
        Term* term = _CLNEW Term(_T("body"),_T("body"));
        Query* q = _CLNEW TermQuery(term);
        _CLDECDELETE(term);

        // test extremes, bounded on both ends

        Filter* f = _CLNEW RangeFilter(_T("rand"),minRP,maxRP,T,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("find all"), numDocs, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("rand"),minRP,maxRP,T,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("all but biggest"), numDocs-1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("rand"),minRP,maxRP,F,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("all but smallest"), numDocs-1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("rand"),minRP,maxRP,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("all but extremes"), numDocs-2, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        // unbounded

        f=_CLNEW RangeFilter(_T("rand"),minRP,NULL,T,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("smallest and up"), numDocs, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("rand"),NULL,maxRP,F,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("biggest and down"), numDocs, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("rand"),minRP,NULL,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("not smallest, but up"), numDocs-1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("rand"),NULL,maxRP,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("not biggest, but down"), numDocs-1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        // very small sets

        f=_CLNEW RangeFilter(_T("rand"),minRP,minRP,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("min,min,F,F"), 0, result->length());
        _CLLDELETE(result); _CLLDELETE(f);
        f=_CLNEW RangeFilter(_T("rand"),maxRP,maxRP,F,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("max,max,F,F"), 0, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("rand"),minRP,minRP,T,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("min,min,T,T"), 1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);
        f=_CLNEW RangeFilter(_T("rand"),NULL,minRP,F,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("nul,min,F,T"), 1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        f=_CLNEW RangeFilter(_T("rand"),maxRP,maxRP,T,T);
        result = search->search(q,f);
        assertEqualsMsg(_T("max,max,T,T"), 1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);
        f=_CLNEW RangeFilter(_T("rand"),maxRP,NULL,T,F);
        result = search->search(q,f);
        assertEqualsMsg(_T("max,nul,T,T"), 1, result->length());
        _CLLDELETE(result); _CLLDELETE(f);

        search->close();
        _CLLDELETE(search);

        reader->close();
        _CLLDELETE(reader);

        _CLLDELETE(q);
    }
SearchData *SearchFilesC(const char* index, const char* fobizzle){

    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
  
        //printf("Enter query string: ");
        strncpy(line,fobizzle,80);
        //line[strlen(line)-1]=0;
        

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);


        
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);
        
        buf = q->toString(_T("contents"));
        
        _tprintf(_T("Searching for: %S\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();
        arrsize = h->length();
        SearchData *search = new SearchData[h->length()];
        for ( size_t i=0; i < h->length(); i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %S - %f\n"), i, doc->get(_T("path")), h->score(i));
            //const TCHAR* wtfbatman;
            //wtfbatman =  doc->get(_T("path"));
            //search[(int)i].score =  h->score(i);
            //printf("Adding %S %d\n", search[i].path, i);
            char *wtfbbq;
            wtfbbq = new char[100];
            sprintf(wtfbbq,"%S %f", doc->get(_T("path")), h->score(i));
            search[(int)i].path = wtfbbq;
            //sprintf(str,"%S", String::New((char*)doc->get(_T("path")),5));
            //printf("PIZZA %s\n", wtfbbq);
            //sprintf(search[i].path,"%S",(const char*)doc->get(_T("path")));
            //printf("segfault");
            //strcpy(search[i].path,(const char*)doc->get(_T("path")));
        
        }


        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        //_CLLDELETE(h);
        //_CLLDELETE(q);

        //s.close();

    //reader->close();
    //_CLLDELETE(reader);
    //printf("Testing %S\n\n", search[0].path);
    return search;
};
    void testRangeQueryRand()
    {
        // NOTE: uses index build in *super* setUp

        IndexReader * pReader = IndexReader::open( index );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );

        std::tstring sMinRP = pad(minR);
        std::tstring sMaxRP = pad(maxR);
        const TCHAR* minRP = sMinRP.c_str();
        const TCHAR* maxRP = sMaxRP.c_str();
    
        size_t numDocs = static_cast<size_t>( pReader->numDocs() );
        assertEqualsMsg( _T("num of docs"), numDocs, static_cast<size_t>(1+ maxId - minId));
        
    	Hits * pResult;
        Query * q;

        // test extremes, bounded on both ends
        
        q = csrq( _T( "rand" ), minRP, maxRP, true, true );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "find all" ), numDocs, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "rand" ), minRP, maxRP, true, false );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "all but biggest" ), numDocs-1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "rand" ), minRP, maxRP, false, true );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "all but smallest" ), numDocs-1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "rand" ), minRP, maxRP, false, false );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "all but extremes" ), numDocs-2, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
    
        // unbounded

        q = csrq( _T( "rand" ), minRP, NULL, true, false );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "smallest and up" ), numDocs, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "rand" ), NULL, maxRP, false, true );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "biggest and down" ), numDocs, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "rand" ), minRP, NULL, false, false );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "not smallest, but up" ), numDocs-1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
            
        q = csrq( _T( "rand" ), NULL, maxRP, false, false );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "not biggest, but down" ), numDocs-1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
        
        // very small sets

        q = csrq( _T( "rand" ), minRP, minRP, false, false );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "min,min,F,F" ), 0, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "rand" ), maxRP, maxRP, false, false );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "max,max,F,F" ), 0, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
                         
        q = csrq( _T( "rand" ), minRP, minRP, true, true );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "min,min,T,T" ), 1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "rand" ), NULL, minRP, false, true );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "nul,min,F,T" ), 1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "rand" ), maxRP, maxRP, true, true );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "max,max,T,T" ), 1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "rand" ), maxRP, NULL, true, false );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "max,nul,T,T" ), 1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
    void testRangeQueryId()
    {
        // NOTE: uses index build in *super* setUp

        IndexReader * pReader = IndexReader::open( index );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );

        int32_t medId = ((maxId - minId) / 2);
        
        std::tstring sMinIP = pad(minId);
        std::tstring sMaxIP = pad(maxId);
        std::tstring sMedIP = pad(medId);
        const TCHAR* minIP = sMinIP.c_str();
        const TCHAR* maxIP = sMaxIP.c_str();
        const TCHAR* medIP = sMedIP.c_str();
    
        size_t numDocs = static_cast<size_t>( pReader->numDocs() );
        assertEqualsMsg( _T("num of docs"), numDocs, static_cast<size_t>(1+ maxId - minId));
        
	    Hits * pResult;
        Query * q;
        // test id, bounded on both ends
        
        q = csrq( _T( "id" ), minIP, maxIP, true, true );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "find all" ), numDocs, pResult->length() );
        _CLDELETE( pResult ); _CLDELETE( q );

	    q = csrq( _T( "id" ), minIP, maxIP, true, false );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "all but last" ), numDocs-1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

	    q = csrq( _T( "id" ), minIP, maxIP, false, true );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "all but first" ), numDocs-1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
            
	    q = csrq( _T( "id" ), minIP, maxIP, false,false );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "all but ends" ), numDocs-2, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
    
        q = csrq( _T( "id" ), medIP, maxIP, true, true );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "med and up" ), 1+maxId-medId, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
        
        q = csrq( _T( "id" ), minIP, medIP, true, true );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "up to med" ), 1+medId-minId, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        // unbounded id

	    q = csrq( _T( "id" ), minIP, NULL, true, false );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "min and up" ), numDocs, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

	    q = csrq( _T( "id" ), NULL, maxIP, false, true );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "max and down" ), numDocs, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

	    q = csrq( _T( "id" ), minIP, NULL, false, false );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "not min, but up" ), numDocs-1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
            
	    q = csrq( _T( "id" ), NULL, maxIP, false, false );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "not max, but down" ), numDocs-1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
            
        q = csrq( _T( "id" ), medIP, maxIP, true, false );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "med and up, not max" ), maxId-medId, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
        
        q = csrq( _T( "id" ), minIP, medIP, false,true );
	    pResult = pSearch->search( q );
        assertEqualsMsg( _T( "not min, up to med" ), medId-minId, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        // very small sets

	    q = csrq( _T( "id" ), minIP, minIP, false, false );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "min,min,F,F" ), 0, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "id" ), medIP, medIP, false, false );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "med,med,F,F" ), 0, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "id") , maxIP, maxIP, false, false );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "max,max,F,F" ), 0, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
                         
	    q = csrq( _T( "id" ), minIP, minIP, true, true );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "min,min,T,T" ), 1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "id" ), NULL, minIP, false, true );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "nul,min,F,T" ), 1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

	    q = csrq( _T( "id" ), maxIP, maxIP, true, true );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "max,max,T,T" ), 1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

        q = csrq( _T( "id" ), maxIP, NULL, true, false );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "max,nul,T,T" ), 1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );

	    q = csrq( _T( "id" ), medIP, medIP, true, true );
	    pResult = pSearch->search( q );
	    assertEqualsMsg( _T( "med,med,T,T" ), 1, pResult->length());
        _CLDELETE( pResult ); _CLDELETE( q );
            
        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
Beispiel #23
0
QMap< int, float >
FuzzyIndex::search( const QString& table, const QString& name, bool fulltext )
{
    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
            {
                qDebug() << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
        }

        if ( name.isEmpty() )
            return resultsmap;

        Hits* hits = 0;
        Query* qry = 0;
        QueryParser parser( table.toStdWString().c_str(), m_analyzer );

        if ( fulltext )
        {
            QString escapedName = QString::fromWCharArray( parser.escape( name.toStdWString().c_str() ) );

            QStringList sl = DatabaseImpl::sortname( escapedName ).split( " ", QString::SkipEmptyParts );
            qry = parser.parse( QString( "%1:%2~" ).arg( table ).arg( sl.join( "~ " ) ).toStdWString().c_str() );
        }
        else
        {
//            qry = _CLNEW FuzzyQuery( _CLNEW Term( table.toStdWString().c_str(), DatabaseImpl::sortname( name ).toStdWString().c_str() ) );
            QString escapedName = QString::fromWCharArray( parser.escape( name.toStdWString().c_str() ) );

            QStringList sl = DatabaseImpl::sortname( escapedName ).split( " ", QString::SkipEmptyParts );
            qry = parser.parse( QString( "%1:\"%2\"~" ).arg( table ).arg( sl.join( " " ) ).toStdWString().c_str() );
        }

        hits = m_luceneSearcher->search( qry );
        for ( uint i = 0; i < hits->length(); i++ )
        {
            Document* d = &hits->doc( i );

            float score = hits->score( i );
            int id = QString::fromWCharArray( d->get( _T( "id" ) ) ).toInt();
            QString result = QString::fromWCharArray( d->get( table.toStdWString().c_str() ) );

            if ( DatabaseImpl::sortname( result ) == DatabaseImpl::sortname( name ) )
                score = 1.0;
            else
                score = qMin( score, (float)0.99 );

            if ( score > 0.20 )
            {
                resultsmap.insert( id, score );
//                qDebug() << "Hitres:" << result << id << score << table << name;
            }
        }

        delete hits;
        delete qry;
    }
    catch( CLuceneError& error )
    {
        tDebug() << "Caught CLucene error:" << error.what();
        Q_ASSERT( false );
    }

    return resultsmap;
}
void
CLuceneIndexReader::getHits(const Strigi::Query& q,
        const std::vector<std::string>& fields,
        const std::vector<Strigi::Variant::Type>& types,
        std::vector<std::vector<Strigi::Variant> >& result, int off, int max) {
    result.clear();
    if (!checkReader() || types.size() < fields.size()) {
        return;
    }

    vector<string> fullFields;
    fullFields.resize(fields.size());
    for (size_t i = 0; i < fields.size(); i++) {
        if (fields[i].compare(0, 6, "xesam:") == 0) {
            fullFields[i].assign(
                "http://freedesktop.org/standards/xesam/1.0/core#"
                + fields[i].substr(6));
        } else if (fields[i].compare(0, 4, "nie:") == 0) {
            fullFields[i].assign(
                "http://www.semanticdesktop.org/ontologies/2007/01/19/nie#"
                + fields[i].substr(4));
        } else {
            fullFields[i].assign(fields[i]);
        }
    }

    // if the query is empty, we return the number of files in the index
    if (q.term().string().size() == 0 && q.subQueries().size() == 0) {
        getDocuments(fullFields, types, result, off, max);
        return;
    }

    Query* bq = p->createQuery(q);
    IndexSearcher searcher(reader);
    Hits* hits = 0;
    int s = 0;
    try {
        hits = searcher.search(bq);
        s = hits->length();
    } catch (CLuceneError& err) {
        fprintf(stderr, "could not query: %s\n", err.what());
    }
    if (off < 0) off = 0;
    max += off;
    if (max < 0) max = s;
    if (max > s) max = s;
    if (max > off) {
        result.reserve(max-off);
    }
    result.resize(max-off);
    for (int i = off; i < max; ++i) {
        Document *d = &hits->doc(i);
        vector<Variant>& doc = result[i-off];
        doc.clear();
        doc.resize(fields.size());

        DocumentFieldEnumeration* e = d->fields();
        while (e->hasMoreElements()) {
            Field* field = e->nextElement();
            string name(wchartoutf8(field->name()));
            for (uint j = 0; j < fullFields.size(); ++j) {
                if (fullFields[j] == name) {
                    doc[j] = p->getFieldValue(field, types[j]);
                }
            }
        }
        _CLDELETE(e);
    }
    if (hits) {
        _CLDELETE(hits);
    }
    searcher.close();
    _CLDELETE(bq);
}