Beispiel #1
0
/**
  Returns a Entry.
  \id The key of the entry.
  */
Response * ZefaniaLex::getEntry(const QString &key)
{
    try {
        if(!hasIndex()) {
            if(buildIndex() != 0) {
                return new StringResponse(QObject::tr("Cannot build index."));
            }
        }
        const QString index = indexPath();
        const QString queryText = "key:" + key;
        const TCHAR* stop_words[] = { nullptr };
        standard::StandardAnalyzer analyzer(stop_words);
        IndexReader* reader = IndexReader::open(index.toStdString().c_str());
        IndexSearcher s(reader);

        Query* q = QueryParser::parse(SearchTools::toTCHAR(queryText), _T("content"), &analyzer);
        Hits* h = s.search(q);
        QString ret = "";
        for(size_t i = 0; i < h->length(); i++) {
            Document* doc = &h->doc(i);
            if(!ret.isEmpty())
                ret.append("<hr /> ");
            ret.append(SearchTools::toQString(doc->get(_T("content"))));
        }
        return ret.isEmpty() ? new StringResponse(QObject::tr("Nothing found for %1").arg(key)) : new StringResponse(ret);
    } catch(...) {
        return new StringResponse(QString());
    }
}
Beispiel #2
0
void SearchFilesC(const char* index, const char* fobizzle){

    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
  
        //printf("Enter query string: ");
        strncpy(line,fobizzle,80);
        //line[strlen(line)-1]=0;
        

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);


        
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);
        
        buf = q->toString(_T("contents"));
        
        _tprintf(_T("Searching for: %S\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();
        //SearchData search[h->length()];
        for ( size_t i=0; i < h->length(); i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %S - %f\n"), i, doc->get(_T("path")), h->score(i));
            //search[i].set_path(doc->get(_T("path")));
        
        }


        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        _CLLDELETE(h);
        _CLLDELETE(q);

        s.close();

    reader->close();
    _CLLDELETE(reader);
};
Beispiel #3
0
void SearchFiles(const char* index){
    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
    while (true) {
        printf("Enter query string: ");
        char* tmp = fgets(line,80,stdin);
        if ( tmp == NULL ) continue;
        line[strlen(line)-1]=0;

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);

        if ( strlen(line) == 0 )
            break;
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);

        buf = q->toString(_T("contents"));
        _tprintf(_T("Searching for: %s\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();

        for ( size_t i=0;i<h->length();i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %s - %f\n"), i, doc->get(_T("path")), h->score(i));
            //print result to web interface:
            LINH_PRINT_WEB(i,doc->get(_T("path")),h->score(i));
        }

        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        _CLLDELETE(h);
        _CLLDELETE(q);

        s.close();
    }
    reader->close();
    _CLLDELETE(reader);
}
Beispiel #4
0
QMap< int, float >
FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
{
    Q_ASSERT( query->isFullTextQuery() );

    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
            {
                qDebug() << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
        }

        QueryParser parser( _T( "album" ), m_analyzer );
        QString escapedName = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );

        Query* qry = _CLNEW FuzzyQuery( _CLNEW Term( _T( "album" ), escapedName.toStdWString().c_str() ) );
        Hits* hits = m_luceneSearcher->search( qry );
        for ( uint i = 0; i < hits->length(); i++ )
        {
            Document* d = &hits->doc( i );

            float score = hits->score( i );
            int id = QString::fromWCharArray( d->get( _T( "albumid" ) ) ).toInt();

            if ( score > 0.30 )
            {
                resultsmap.insert( id, score );
//                tDebug() << "Index hit:" << id << score;
            }
        }

        delete hits;
        delete qry;
    }
    catch( CLuceneError& error )
    {
        tDebug() << "Caught CLucene error:" << error.what();
        Q_ASSERT( false );
    }

    return resultsmap;
}
Beispiel #5
0
    static int EIO_Search(eio_req* req) 
    {
        search_baton_t* baton = static_cast<search_baton_t*>(req->data);

        standard::StandardAnalyzer analyzer;
        IndexReader* reader = 0;
        try {
            reader = IndexReader::open(*(*baton->index));
        } catch (CLuceneError& E) {
          baton->error.assign(E.what());
          return 0;
        } catch(...) {
          baton->error = "Got an unknown exception";
          return 0;
        }
        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ) {
            delete reader;
            reader = newreader;
        }
        IndexSearcher s(reader);

        try {
            TCHAR* searchString = STRDUP_AtoT(*(*baton->search));
            Query* q = QueryParser::parse(searchString, _T(""), &analyzer);
            Hits* hits = s.search(q);

            HandleScope scope;
            //_CLDELETE(q);
            free(searchString);
            // Build the result array
            Local<v8::Array> resultArray = v8::Array::New();
            for (size_t i=0; i < hits->length(); i++) {
                Document& doc(hits->doc(i));
                // {"id":"ab34", "score":1.0}
                Local<Object> resultObject = Object::New();
                // TODO:  This dup might be a leak
                resultObject->Set(String::New("id"), String::New(STRDUP_TtoA(doc.get(_T("_id")))));
                resultObject->Set(String::New("score"), Number::New(hits->score(i)));
                resultArray->Set(i, resultObject);
            }
            baton->results = Persistent<v8::Array>::New(resultArray);
        } catch (CLuceneError& E) {
          baton->error.assign(E.what());
        } catch(...) {
          baton->error = "Got an unknown exception";
        }

        return 0;
    }
vector<pair<string,uint32_t> >
CLuceneIndexReader::histogram(const string& query,
        const string& fieldname, const string& labeltype) {
    vector<pair<string,uint32_t> > h;
    if (!checkReader()) {
        return h;
    }
    Strigi::QueryParser parser;
    Strigi::Query q = parser.buildQuery(query);
    Query* bq = p->createQuery(q);
    IndexSearcher searcher(reader);
    Hits* hits = 0;
    int s = 0;
    try {
        hits = searcher.search(bq);
        s = hits->length();
    } catch (CLuceneError& err) {
        fprintf(stderr, "could not query: %s\n", err.what());
    }
    wstring field = utf8toucs2(fieldname);
    int32_t max = INT_MIN;
    int32_t min = INT_MAX;
    vector<int32_t> values;
    values.reserve(s);
    char* end;
    for (int i = 0; i < s; ++i) {
        Document *d = &hits->doc(i);
        const TCHAR* v = d->get(field.c_str());
        if (v) {
            int val = (int)strtol(wchartoutf8( v ).c_str(), &end, 10);
            if ( *end != 0) {
                _CLDELETE(hits);
                return h;
            }
            values.push_back(val);
            max = (max>val) ?max :val;
            min = (min<val) ?min :val;
        }
    }
    if (hits) {
        _CLDELETE(hits);
    }
    searcher.close();
    _CLDELETE(bq);
    if (fieldname == FieldRegister::mtimeFieldName || labeltype == "time") {
        return makeTimeHistogram(values);
    } else {
        return makeHistogram(values, min, max);
    }
}
vector<IndexedDocument>
CLuceneIndexReader::query(const Strigi::Query& q, int off, int max) {
    vector<IndexedDocument> results;
    if (!checkReader()) {
        return results;
    }
    // handle special commands
    if (q.fields().size() == 1 && q.fields()[0].empty()
            && q.term().string().substr(0, 14) == "strigispecial:") {
        return p->strigiSpecial(q.term().string());
    }

    Query* bq = p->createQuery(q);
    IndexSearcher searcher(reader);
    Hits* hits = 0;
    int s = 0;
    try {
        hits = searcher.search(bq);
        s = hits->length();
    } catch (CLuceneError& err) {
        fprintf(stderr, "could not query: %s\n", err.what());
    }
    if (off < 0) off = 0;
    max += off;
    if (max < 0) max = s;
    if (max > s) max = s;
    if (max > off) {
        results.reserve(max-off);
    }
    for (int i = off; i < max; ++i) {
        Document *d = &hits->doc(i);
        IndexedDocument doc;
        doc.score = hits->score(i);
        DocumentFieldEnumeration* e = d->fields();
        while (e->hasMoreElements()) {
            Field* f = e->nextElement();
            Private::addField(f, doc);
        }
        results.push_back(doc);
        _CLDELETE(e);
    }
    if (hits) {
        _CLDELETE(hits);
    }
    searcher.close();
    _CLDELETE(bq);
    return results;
}
void
CLuceneIndexReader::getChildren(const std::string& parent,
            std::map<std::string, time_t>& children) {
    children.clear();
    // force a fresh reader. This is important because the function
    // getChildren is essential for updating the index
    if ( !checkReader(true) ) {
        return;
    }
    // build a query
    Term* t = Private::createKeywordTerm(Private::parentlocation(),
        parent);
    Query* q = _CLNEW TermQuery(t);
    _CLDECDELETE(t);
    IndexSearcher searcher(reader);
    Hits* hits = 0;
    int nhits = 0;
    try {
        hits = searcher.search(q);
        nhits = hits->length();
    } catch (CLuceneError& err) {
        fprintf(stderr, "could not query: %s\n", err.what());
    }
    const TCHAR* mtime = mapId(Private::mtime());
    for (int i = 0; i < nhits; ++i) {
        Document* d = &hits->doc(i);

        const TCHAR* v = d->get(mtime);
        // check that mtime is defined for this document
        if (v) {
            time_t mtime = atoi(wchartoutf8( v ).c_str());
            v = d->get(Private::systemlocation());
            if (v) {
                children[wchartoutf8( v )] = mtime;
            }
        }

    }
    if (hits) {
        _CLDELETE(hits);
    }
    searcher.close();
    _CLDELETE(q);
}
Beispiel #9
0
/**
  Returns a Entry.
  \id The key of the entry.
  */
QString ZefaniaLex::getEntry(const QString &key)
{
    try {
        if(!hasIndex()) {
            if(buildIndex() != 0) {
                return QObject::tr("Cannot build index.");
            }
        }
        const QString index = indexPath();
        const QString queryText = "key:" + key;
        const TCHAR* stop_words[] = { NULL };
        standard::StandardAnalyzer analyzer(stop_words);
        IndexReader* reader = IndexReader::open(index.toStdString().c_str());
        IndexSearcher s(reader);
    #ifdef OBV_USE_WSTRING
        Query* q = QueryParser::parse(queryText.toStdWString().c_str(), _T("content"), &analyzer);
    #else
        Query* q = QueryParser::parse(reinterpret_cast<const wchar_t *>(queryText.utf16()), _T("content"), &analyzer);
    #endif
        Hits* h = s.search(q);
        QString ret = "";
        for(size_t i = 0; i < h->length(); i++) {
            Document* doc = &h->doc(i);
            if(!ret.isEmpty())
                ret.append("<hr /> ");
    #ifdef OBV_USE_WSTRING
            ret.append(QString::fromWCharArray(doc->get(_T("content"))));
    #else
            ret.append(QString::fromUtf16((const ushort*)doc->get(_T("content"))));
    #endif
        }
        return ret.isEmpty() ? QObject::tr("Nothing found for %1").arg(key) : ret;
    } catch(...) {
        return QString();
    }
}
Beispiel #10
0
QMap< int, float >
FuzzyIndex::search( const Tomahawk::query_ptr& query )
{
    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
            {
                qDebug() << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
        }

        float minScore;
        const TCHAR** fields = 0;
        MultiFieldQueryParser parser( fields, m_analyzer );
        BooleanQuery* qry = _CLNEW BooleanQuery();

        if ( query->isFullTextQuery() )
        {
            QString escapedQuery = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );

            Term* term = _CLNEW Term( _T( "track" ), escapedQuery.toStdWString().c_str() );
            Query* fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::SHOULD );

            term = _CLNEW Term( _T( "artist" ), escapedQuery.toStdWString().c_str() );
            fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::SHOULD );

            term = _CLNEW Term( _T( "fulltext" ), escapedQuery.toStdWString().c_str() );
            fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::SHOULD );

            minScore = 0.00;
        }
        else
        {
            QString track = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->track() ).toStdWString().c_str() ) );
            QString artist = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->artist() ).toStdWString().c_str() ) );
//            QString album = QString::fromWCharArray( parser.escape( query->album().toStdWString().c_str() ) );

            Term* term = _CLNEW Term( _T( "track" ), track.toStdWString().c_str() );
            Query* fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::MUST );

            term = _CLNEW Term( _T( "artist" ), artist.toStdWString().c_str() );
            fqry = _CLNEW FuzzyQuery( term );
            qry->add( fqry, true, BooleanClause::MUST );

            minScore = 0.00;
        }

        Hits* hits = m_luceneSearcher->search( qry );
        for ( uint i = 0; i < hits->length(); i++ )
        {
            Document* d = &hits->doc( i );

            float score = hits->score( i );
            int id = QString::fromWCharArray( d->get( _T( "trackid" ) ) ).toInt();

            if ( score > minScore )
            {
                resultsmap.insert( id, score );
//                tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() );
            }
        }

        delete hits;
        delete qry;
    }
    catch( CLuceneError& error )
    {
        tDebug() << "Caught CLucene error:" << error.what();
        Q_ASSERT( false );
    }

    return resultsmap;
}
SearchData *SearchFilesC(const char* index, const char* fobizzle){

    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
  
        //printf("Enter query string: ");
        strncpy(line,fobizzle,80);
        //line[strlen(line)-1]=0;
        

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);


        
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);
        
        buf = q->toString(_T("contents"));
        
        _tprintf(_T("Searching for: %S\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();
        arrsize = h->length();
        SearchData *search = new SearchData[h->length()];
        for ( size_t i=0; i < h->length(); i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %S - %f\n"), i, doc->get(_T("path")), h->score(i));
            //const TCHAR* wtfbatman;
            //wtfbatman =  doc->get(_T("path"));
            //search[(int)i].score =  h->score(i);
            //printf("Adding %S %d\n", search[i].path, i);
            char *wtfbbq;
            wtfbbq = new char[100];
            sprintf(wtfbbq,"%S %f", doc->get(_T("path")), h->score(i));
            search[(int)i].path = wtfbbq;
            //sprintf(str,"%S", String::New((char*)doc->get(_T("path")),5));
            //printf("PIZZA %s\n", wtfbbq);
            //sprintf(search[i].path,"%S",(const char*)doc->get(_T("path")));
            //printf("segfault");
            //strcpy(search[i].path,(const char*)doc->get(_T("path")));
        
        }


        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        //_CLLDELETE(h);
        //_CLLDELETE(q);

        //s.close();

    //reader->close();
    //_CLLDELETE(reader);
    //printf("Testing %S\n\n", search[0].path);
    return search;
};
void
CLuceneIndexReader::getHits(const Strigi::Query& q,
        const std::vector<std::string>& fields,
        const std::vector<Strigi::Variant::Type>& types,
        std::vector<std::vector<Strigi::Variant> >& result, int off, int max) {
    result.clear();
    if (!checkReader() || types.size() < fields.size()) {
        return;
    }

    vector<string> fullFields;
    fullFields.resize(fields.size());
    for (size_t i = 0; i < fields.size(); i++) {
        if (fields[i].compare(0, 6, "xesam:") == 0) {
            fullFields[i].assign(
                "http://freedesktop.org/standards/xesam/1.0/core#"
                + fields[i].substr(6));
        } else if (fields[i].compare(0, 4, "nie:") == 0) {
            fullFields[i].assign(
                "http://www.semanticdesktop.org/ontologies/2007/01/19/nie#"
                + fields[i].substr(4));
        } else {
            fullFields[i].assign(fields[i]);
        }
    }

    // if the query is empty, we return the number of files in the index
    if (q.term().string().size() == 0 && q.subQueries().size() == 0) {
        getDocuments(fullFields, types, result, off, max);
        return;
    }

    Query* bq = p->createQuery(q);
    IndexSearcher searcher(reader);
    Hits* hits = 0;
    int s = 0;
    try {
        hits = searcher.search(bq);
        s = hits->length();
    } catch (CLuceneError& err) {
        fprintf(stderr, "could not query: %s\n", err.what());
    }
    if (off < 0) off = 0;
    max += off;
    if (max < 0) max = s;
    if (max > s) max = s;
    if (max > off) {
        result.reserve(max-off);
    }
    result.resize(max-off);
    for (int i = off; i < max; ++i) {
        Document *d = &hits->doc(i);
        vector<Variant>& doc = result[i-off];
        doc.clear();
        doc.resize(fields.size());

        DocumentFieldEnumeration* e = d->fields();
        while (e->hasMoreElements()) {
            Field* field = e->nextElement();
            string name(wchartoutf8(field->name()));
            for (uint j = 0; j < fullFields.size(); ++j) {
                if (fullFields[j] == name) {
                    doc[j] = p->getFieldValue(field, types[j]);
                }
            }
        }
        _CLDELETE(e);
    }
    if (hits) {
        _CLDELETE(hits);
    }
    searcher.close();
    _CLDELETE(bq);
}
Beispiel #13
0
QMap< int, float >
FuzzyIndex::search( const QString& table, const QString& name, bool fulltext )
{
    QMutexLocker lock( &m_mutex );

    QMap< int, float > resultsmap;
    try
    {
        if ( !m_luceneReader )
        {
            if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) )
            {
                qDebug() << Q_FUNC_INFO << "index didn't exist.";
                return resultsmap;
            }

            m_luceneReader = IndexReader::open( m_luceneDir );
            m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
        }

        if ( name.isEmpty() )
            return resultsmap;

        Hits* hits = 0;
        Query* qry = 0;
        QueryParser parser( table.toStdWString().c_str(), m_analyzer );

        if ( fulltext )
        {
            QString escapedName = QString::fromWCharArray( parser.escape( name.toStdWString().c_str() ) );

            QStringList sl = DatabaseImpl::sortname( escapedName ).split( " ", QString::SkipEmptyParts );
            qry = parser.parse( QString( "%1:%2~" ).arg( table ).arg( sl.join( "~ " ) ).toStdWString().c_str() );
        }
        else
        {
//            qry = _CLNEW FuzzyQuery( _CLNEW Term( table.toStdWString().c_str(), DatabaseImpl::sortname( name ).toStdWString().c_str() ) );
            QString escapedName = QString::fromWCharArray( parser.escape( name.toStdWString().c_str() ) );

            QStringList sl = DatabaseImpl::sortname( escapedName ).split( " ", QString::SkipEmptyParts );
            qry = parser.parse( QString( "%1:\"%2\"~" ).arg( table ).arg( sl.join( " " ) ).toStdWString().c_str() );
        }

        hits = m_luceneSearcher->search( qry );
        for ( uint i = 0; i < hits->length(); i++ )
        {
            Document* d = &hits->doc( i );

            float score = hits->score( i );
            int id = QString::fromWCharArray( d->get( _T( "id" ) ) ).toInt();
            QString result = QString::fromWCharArray( d->get( table.toStdWString().c_str() ) );

            if ( DatabaseImpl::sortname( result ) == DatabaseImpl::sortname( name ) )
                score = 1.0;
            else
                score = qMin( score, (float)0.99 );

            if ( score > 0.20 )
            {
                resultsmap.insert( id, score );
//                qDebug() << "Hitres:" << result << id << score << table << name;
            }
        }

        delete hits;
        delete qry;
    }
    catch( CLuceneError& error )
    {
        tDebug() << "Caught CLucene error:" << error.what();
        Q_ASSERT( false );
    }

    return resultsmap;
}