/** Returns a Entry. \id The key of the entry. */ Response * ZefaniaLex::getEntry(const QString &key) { try { if(!hasIndex()) { if(buildIndex() != 0) { return new StringResponse(QObject::tr("Cannot build index.")); } } const QString index = indexPath(); const QString queryText = "key:" + key; const TCHAR* stop_words[] = { nullptr }; standard::StandardAnalyzer analyzer(stop_words); IndexReader* reader = IndexReader::open(index.toStdString().c_str()); IndexSearcher s(reader); Query* q = QueryParser::parse(SearchTools::toTCHAR(queryText), _T("content"), &analyzer); Hits* h = s.search(q); QString ret = ""; for(size_t i = 0; i < h->length(); i++) { Document* doc = &h->doc(i); if(!ret.isEmpty()) ret.append("<hr /> "); ret.append(SearchTools::toQString(doc->get(_T("content")))); } return ret.isEmpty() ? new StringResponse(QObject::tr("Nothing found for %1").arg(key)) : new StringResponse(ret); } catch(...) { return new StringResponse(QString()); } }
void SearchFilesC(const char* index, const char* fobizzle){ standard::StandardAnalyzer analyzer; char line[80]; TCHAR tline[80]; TCHAR* buf; IndexReader* reader = IndexReader::open(index); //printf("Enter query string: "); strncpy(line,fobizzle,80); //line[strlen(line)-1]=0; IndexReader* newreader = reader->reopen(); if ( newreader != reader ){ _CLLDELETE(reader); reader = newreader; } IndexSearcher s(reader); STRCPY_AtoT(tline,line,80); Query* q = QueryParser::parse(tline,_T("contents"),&analyzer); buf = q->toString(_T("contents")); _tprintf(_T("Searching for: %S\n\n"), buf); _CLDELETE_LCARRAY(buf); uint64_t str = Misc::currentTimeMillis(); Hits* h = s.search(q); uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str); str = Misc::currentTimeMillis(); //SearchData search[h->length()]; for ( size_t i=0; i < h->length(); i++ ){ Document* doc = &h->doc(i); //const TCHAR* buf = doc.get(_T("contents")); _tprintf(_T("%d. %S - %f\n"), i, doc->get(_T("path")), h->score(i)); //search[i].set_path(doc->get(_T("path"))); } printf("\n\nSearch took: %d ms.\n", srch); printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str)); _CLLDELETE(h); _CLLDELETE(q); s.close(); reader->close(); _CLLDELETE(reader); };
void SearchFiles(const char* index){ standard::StandardAnalyzer analyzer; char line[80]; TCHAR tline[80]; TCHAR* buf; IndexReader* reader = IndexReader::open(index); while (true) { printf("Enter query string: "); char* tmp = fgets(line,80,stdin); if ( tmp == NULL ) continue; line[strlen(line)-1]=0; IndexReader* newreader = reader->reopen(); if ( newreader != reader ){ _CLLDELETE(reader); reader = newreader; } IndexSearcher s(reader); if ( strlen(line) == 0 ) break; STRCPY_AtoT(tline,line,80); Query* q = QueryParser::parse(tline,_T("contents"),&analyzer); buf = q->toString(_T("contents")); _tprintf(_T("Searching for: %s\n\n"), buf); _CLDELETE_LCARRAY(buf); uint64_t str = Misc::currentTimeMillis(); Hits* h = s.search(q); uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str); str = Misc::currentTimeMillis(); for ( size_t i=0;i<h->length();i++ ){ Document* doc = &h->doc(i); //const TCHAR* buf = doc.get(_T("contents")); _tprintf(_T("%d. %s - %f\n"), i, doc->get(_T("path")), h->score(i)); //print result to web interface: LINH_PRINT_WEB(i,doc->get(_T("path")),h->score(i)); } printf("\n\nSearch took: %d ms.\n", srch); printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str)); _CLLDELETE(h); _CLLDELETE(q); s.close(); } reader->close(); _CLLDELETE(reader); }
QMap< int, float > FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query ) { Q_ASSERT( query->isFullTextQuery() ); QMutexLocker lock( &m_mutex ); QMap< int, float > resultsmap; try { if ( !m_luceneReader ) { if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) ) { qDebug() << Q_FUNC_INFO << "index didn't exist."; return resultsmap; } m_luceneReader = IndexReader::open( m_luceneDir ); m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader ); } QueryParser parser( _T( "album" ), m_analyzer ); QString escapedName = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) ); Query* qry = _CLNEW FuzzyQuery( _CLNEW Term( _T( "album" ), escapedName.toStdWString().c_str() ) ); Hits* hits = m_luceneSearcher->search( qry ); for ( uint i = 0; i < hits->length(); i++ ) { Document* d = &hits->doc( i ); float score = hits->score( i ); int id = QString::fromWCharArray( d->get( _T( "albumid" ) ) ).toInt(); if ( score > 0.30 ) { resultsmap.insert( id, score ); // tDebug() << "Index hit:" << id << score; } } delete hits; delete qry; } catch( CLuceneError& error ) { tDebug() << "Caught CLucene error:" << error.what(); Q_ASSERT( false ); } return resultsmap; }
static int EIO_Search(eio_req* req) { search_baton_t* baton = static_cast<search_baton_t*>(req->data); standard::StandardAnalyzer analyzer; IndexReader* reader = 0; try { reader = IndexReader::open(*(*baton->index)); } catch (CLuceneError& E) { baton->error.assign(E.what()); return 0; } catch(...) { baton->error = "Got an unknown exception"; return 0; } IndexReader* newreader = reader->reopen(); if ( newreader != reader ) { delete reader; reader = newreader; } IndexSearcher s(reader); try { TCHAR* searchString = STRDUP_AtoT(*(*baton->search)); Query* q = QueryParser::parse(searchString, _T(""), &analyzer); Hits* hits = s.search(q); HandleScope scope; //_CLDELETE(q); free(searchString); // Build the result array Local<v8::Array> resultArray = v8::Array::New(); for (size_t i=0; i < hits->length(); i++) { Document& doc(hits->doc(i)); // {"id":"ab34", "score":1.0} Local<Object> resultObject = Object::New(); // TODO: This dup might be a leak resultObject->Set(String::New("id"), String::New(STRDUP_TtoA(doc.get(_T("_id"))))); resultObject->Set(String::New("score"), Number::New(hits->score(i))); resultArray->Set(i, resultObject); } baton->results = Persistent<v8::Array>::New(resultArray); } catch (CLuceneError& E) { baton->error.assign(E.what()); } catch(...) { baton->error = "Got an unknown exception"; } return 0; }
vector<pair<string,uint32_t> > CLuceneIndexReader::histogram(const string& query, const string& fieldname, const string& labeltype) { vector<pair<string,uint32_t> > h; if (!checkReader()) { return h; } Strigi::QueryParser parser; Strigi::Query q = parser.buildQuery(query); Query* bq = p->createQuery(q); IndexSearcher searcher(reader); Hits* hits = 0; int s = 0; try { hits = searcher.search(bq); s = hits->length(); } catch (CLuceneError& err) { fprintf(stderr, "could not query: %s\n", err.what()); } wstring field = utf8toucs2(fieldname); int32_t max = INT_MIN; int32_t min = INT_MAX; vector<int32_t> values; values.reserve(s); char* end; for (int i = 0; i < s; ++i) { Document *d = &hits->doc(i); const TCHAR* v = d->get(field.c_str()); if (v) { int val = (int)strtol(wchartoutf8( v ).c_str(), &end, 10); if ( *end != 0) { _CLDELETE(hits); return h; } values.push_back(val); max = (max>val) ?max :val; min = (min<val) ?min :val; } } if (hits) { _CLDELETE(hits); } searcher.close(); _CLDELETE(bq); if (fieldname == FieldRegister::mtimeFieldName || labeltype == "time") { return makeTimeHistogram(values); } else { return makeHistogram(values, min, max); } }
vector<IndexedDocument> CLuceneIndexReader::query(const Strigi::Query& q, int off, int max) { vector<IndexedDocument> results; if (!checkReader()) { return results; } // handle special commands if (q.fields().size() == 1 && q.fields()[0].empty() && q.term().string().substr(0, 14) == "strigispecial:") { return p->strigiSpecial(q.term().string()); } Query* bq = p->createQuery(q); IndexSearcher searcher(reader); Hits* hits = 0; int s = 0; try { hits = searcher.search(bq); s = hits->length(); } catch (CLuceneError& err) { fprintf(stderr, "could not query: %s\n", err.what()); } if (off < 0) off = 0; max += off; if (max < 0) max = s; if (max > s) max = s; if (max > off) { results.reserve(max-off); } for (int i = off; i < max; ++i) { Document *d = &hits->doc(i); IndexedDocument doc; doc.score = hits->score(i); DocumentFieldEnumeration* e = d->fields(); while (e->hasMoreElements()) { Field* f = e->nextElement(); Private::addField(f, doc); } results.push_back(doc); _CLDELETE(e); } if (hits) { _CLDELETE(hits); } searcher.close(); _CLDELETE(bq); return results; }
void CLuceneIndexReader::getChildren(const std::string& parent, std::map<std::string, time_t>& children) { children.clear(); // force a fresh reader. This is important because the function // getChildren is essential for updating the index if ( !checkReader(true) ) { return; } // build a query Term* t = Private::createKeywordTerm(Private::parentlocation(), parent); Query* q = _CLNEW TermQuery(t); _CLDECDELETE(t); IndexSearcher searcher(reader); Hits* hits = 0; int nhits = 0; try { hits = searcher.search(q); nhits = hits->length(); } catch (CLuceneError& err) { fprintf(stderr, "could not query: %s\n", err.what()); } const TCHAR* mtime = mapId(Private::mtime()); for (int i = 0; i < nhits; ++i) { Document* d = &hits->doc(i); const TCHAR* v = d->get(mtime); // check that mtime is defined for this document if (v) { time_t mtime = atoi(wchartoutf8( v ).c_str()); v = d->get(Private::systemlocation()); if (v) { children[wchartoutf8( v )] = mtime; } } } if (hits) { _CLDELETE(hits); } searcher.close(); _CLDELETE(q); }
/** Returns a Entry. \id The key of the entry. */ QString ZefaniaLex::getEntry(const QString &key) { try { if(!hasIndex()) { if(buildIndex() != 0) { return QObject::tr("Cannot build index."); } } const QString index = indexPath(); const QString queryText = "key:" + key; const TCHAR* stop_words[] = { NULL }; standard::StandardAnalyzer analyzer(stop_words); IndexReader* reader = IndexReader::open(index.toStdString().c_str()); IndexSearcher s(reader); #ifdef OBV_USE_WSTRING Query* q = QueryParser::parse(queryText.toStdWString().c_str(), _T("content"), &analyzer); #else Query* q = QueryParser::parse(reinterpret_cast<const wchar_t *>(queryText.utf16()), _T("content"), &analyzer); #endif Hits* h = s.search(q); QString ret = ""; for(size_t i = 0; i < h->length(); i++) { Document* doc = &h->doc(i); if(!ret.isEmpty()) ret.append("<hr /> "); #ifdef OBV_USE_WSTRING ret.append(QString::fromWCharArray(doc->get(_T("content")))); #else ret.append(QString::fromUtf16((const ushort*)doc->get(_T("content")))); #endif } return ret.isEmpty() ? QObject::tr("Nothing found for %1").arg(key) : ret; } catch(...) { return QString(); } }
QMap< int, float > FuzzyIndex::search( const Tomahawk::query_ptr& query ) { QMutexLocker lock( &m_mutex ); QMap< int, float > resultsmap; try { if ( !m_luceneReader ) { if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) ) { qDebug() << Q_FUNC_INFO << "index didn't exist."; return resultsmap; } m_luceneReader = IndexReader::open( m_luceneDir ); m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader ); } float minScore; const TCHAR** fields = 0; MultiFieldQueryParser parser( fields, m_analyzer ); BooleanQuery* qry = _CLNEW BooleanQuery(); if ( query->isFullTextQuery() ) { QString escapedQuery = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) ); Term* term = _CLNEW Term( _T( "track" ), escapedQuery.toStdWString().c_str() ); Query* fqry = _CLNEW FuzzyQuery( term ); qry->add( fqry, true, BooleanClause::SHOULD ); term = _CLNEW Term( _T( "artist" ), escapedQuery.toStdWString().c_str() ); fqry = _CLNEW FuzzyQuery( term ); qry->add( fqry, true, BooleanClause::SHOULD ); term = _CLNEW Term( _T( "fulltext" ), escapedQuery.toStdWString().c_str() ); fqry = _CLNEW FuzzyQuery( term ); qry->add( fqry, true, BooleanClause::SHOULD ); minScore = 0.00; } else { QString track = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->track() ).toStdWString().c_str() ) ); QString artist = QString::fromWCharArray( parser.escape( DatabaseImpl::sortname( query->artist() ).toStdWString().c_str() ) ); // QString album = QString::fromWCharArray( parser.escape( query->album().toStdWString().c_str() ) ); Term* term = _CLNEW Term( _T( "track" ), track.toStdWString().c_str() ); Query* fqry = _CLNEW FuzzyQuery( term ); qry->add( fqry, true, BooleanClause::MUST ); term = _CLNEW Term( _T( "artist" ), artist.toStdWString().c_str() ); fqry = _CLNEW FuzzyQuery( term ); qry->add( fqry, true, BooleanClause::MUST ); minScore = 0.00; } Hits* hits = m_luceneSearcher->search( qry ); for ( uint i = 0; i < hits->length(); i++ ) { Document* d = &hits->doc( i ); float score = hits->score( i ); int id = QString::fromWCharArray( d->get( _T( "trackid" ) ) ).toInt(); if ( score > minScore ) { resultsmap.insert( id, score ); // tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() ); } } delete hits; delete qry; } catch( CLuceneError& error ) { tDebug() << "Caught CLucene error:" << error.what(); Q_ASSERT( false ); } return resultsmap; }
SearchData *SearchFilesC(const char* index, const char* fobizzle){ standard::StandardAnalyzer analyzer; char line[80]; TCHAR tline[80]; TCHAR* buf; IndexReader* reader = IndexReader::open(index); //printf("Enter query string: "); strncpy(line,fobizzle,80); //line[strlen(line)-1]=0; IndexReader* newreader = reader->reopen(); if ( newreader != reader ){ _CLLDELETE(reader); reader = newreader; } IndexSearcher s(reader); STRCPY_AtoT(tline,line,80); Query* q = QueryParser::parse(tline,_T("contents"),&analyzer); buf = q->toString(_T("contents")); _tprintf(_T("Searching for: %S\n\n"), buf); _CLDELETE_LCARRAY(buf); uint64_t str = Misc::currentTimeMillis(); Hits* h = s.search(q); uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str); str = Misc::currentTimeMillis(); arrsize = h->length(); SearchData *search = new SearchData[h->length()]; for ( size_t i=0; i < h->length(); i++ ){ Document* doc = &h->doc(i); //const TCHAR* buf = doc.get(_T("contents")); _tprintf(_T("%d. %S - %f\n"), i, doc->get(_T("path")), h->score(i)); //const TCHAR* wtfbatman; //wtfbatman = doc->get(_T("path")); //search[(int)i].score = h->score(i); //printf("Adding %S %d\n", search[i].path, i); char *wtfbbq; wtfbbq = new char[100]; sprintf(wtfbbq,"%S %f", doc->get(_T("path")), h->score(i)); search[(int)i].path = wtfbbq; //sprintf(str,"%S", String::New((char*)doc->get(_T("path")),5)); //printf("PIZZA %s\n", wtfbbq); //sprintf(search[i].path,"%S",(const char*)doc->get(_T("path"))); //printf("segfault"); //strcpy(search[i].path,(const char*)doc->get(_T("path"))); } printf("\n\nSearch took: %d ms.\n", srch); printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str)); //_CLLDELETE(h); //_CLLDELETE(q); //s.close(); //reader->close(); //_CLLDELETE(reader); //printf("Testing %S\n\n", search[0].path); return search; };
void CLuceneIndexReader::getHits(const Strigi::Query& q, const std::vector<std::string>& fields, const std::vector<Strigi::Variant::Type>& types, std::vector<std::vector<Strigi::Variant> >& result, int off, int max) { result.clear(); if (!checkReader() || types.size() < fields.size()) { return; } vector<string> fullFields; fullFields.resize(fields.size()); for (size_t i = 0; i < fields.size(); i++) { if (fields[i].compare(0, 6, "xesam:") == 0) { fullFields[i].assign( "http://freedesktop.org/standards/xesam/1.0/core#" + fields[i].substr(6)); } else if (fields[i].compare(0, 4, "nie:") == 0) { fullFields[i].assign( "http://www.semanticdesktop.org/ontologies/2007/01/19/nie#" + fields[i].substr(4)); } else { fullFields[i].assign(fields[i]); } } // if the query is empty, we return the number of files in the index if (q.term().string().size() == 0 && q.subQueries().size() == 0) { getDocuments(fullFields, types, result, off, max); return; } Query* bq = p->createQuery(q); IndexSearcher searcher(reader); Hits* hits = 0; int s = 0; try { hits = searcher.search(bq); s = hits->length(); } catch (CLuceneError& err) { fprintf(stderr, "could not query: %s\n", err.what()); } if (off < 0) off = 0; max += off; if (max < 0) max = s; if (max > s) max = s; if (max > off) { result.reserve(max-off); } result.resize(max-off); for (int i = off; i < max; ++i) { Document *d = &hits->doc(i); vector<Variant>& doc = result[i-off]; doc.clear(); doc.resize(fields.size()); DocumentFieldEnumeration* e = d->fields(); while (e->hasMoreElements()) { Field* field = e->nextElement(); string name(wchartoutf8(field->name())); for (uint j = 0; j < fullFields.size(); ++j) { if (fullFields[j] == name) { doc[j] = p->getFieldValue(field, types[j]); } } } _CLDELETE(e); } if (hits) { _CLDELETE(hits); } searcher.close(); _CLDELETE(bq); }
QMap< int, float > FuzzyIndex::search( const QString& table, const QString& name, bool fulltext ) { QMutexLocker lock( &m_mutex ); QMap< int, float > resultsmap; try { if ( !m_luceneReader ) { if ( !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ) ) { qDebug() << Q_FUNC_INFO << "index didn't exist."; return resultsmap; } m_luceneReader = IndexReader::open( m_luceneDir ); m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader ); } if ( name.isEmpty() ) return resultsmap; Hits* hits = 0; Query* qry = 0; QueryParser parser( table.toStdWString().c_str(), m_analyzer ); if ( fulltext ) { QString escapedName = QString::fromWCharArray( parser.escape( name.toStdWString().c_str() ) ); QStringList sl = DatabaseImpl::sortname( escapedName ).split( " ", QString::SkipEmptyParts ); qry = parser.parse( QString( "%1:%2~" ).arg( table ).arg( sl.join( "~ " ) ).toStdWString().c_str() ); } else { // qry = _CLNEW FuzzyQuery( _CLNEW Term( table.toStdWString().c_str(), DatabaseImpl::sortname( name ).toStdWString().c_str() ) ); QString escapedName = QString::fromWCharArray( parser.escape( name.toStdWString().c_str() ) ); QStringList sl = DatabaseImpl::sortname( escapedName ).split( " ", QString::SkipEmptyParts ); qry = parser.parse( QString( "%1:\"%2\"~" ).arg( table ).arg( sl.join( " " ) ).toStdWString().c_str() ); } hits = m_luceneSearcher->search( qry ); for ( uint i = 0; i < hits->length(); i++ ) { Document* d = &hits->doc( i ); float score = hits->score( i ); int id = QString::fromWCharArray( d->get( _T( "id" ) ) ).toInt(); QString result = QString::fromWCharArray( d->get( table.toStdWString().c_str() ) ); if ( DatabaseImpl::sortname( result ) == DatabaseImpl::sortname( name ) ) score = 1.0; else score = qMin( score, (float)0.99 ); if ( score > 0.20 ) { resultsmap.insert( id, score ); // qDebug() << "Hitres:" << result << id << score << table << name; } } delete hits; delete qry; } catch( CLuceneError& error ) { tDebug() << "Caught CLucene error:" << error.what(); Q_ASSERT( false ); } return resultsmap; }