void searchPhrase(const string &indexPath, istream &in, bool fuzzy) { IndexSearcher is(indexPath); string line; while ( getline(in, line) ) { line = util::trim(line); if ( line.length() == 0 ) continue; cout << line << endl; searchPhrase(is, line, fuzzy); } }
bool LCHMFile::searchQuery( const QString& inquery, QStringList * searchresults, unsigned int limit ) { QStringList words_must_exist, words_must_not_exist, words_highlight; QVector<QStringList> phrases_must_exist; QString query = inquery; bool query_valid = true; LCHMSearchProgressResults results; int pos; int i; /* * Parse the search query with a simple state machine. * Query should consist of one of more words separated by a space with a possible prefix. * A prefix may be: * + indicates that the word is required; any page without this word is excluded from the result. * - indicates that the word is required to be absent; any page with this word is excluded from * the result. * "." indicates a phrase. Anything between quotes indicates a phrase, which is set of space-separated * words. Will be in result only if the words in phrase are in page in the same sequence, and * follow each other. * If there is no prefix, the word considered as required. */ QRegExp rxphrase( "\"(.*)\"" ); QRegExp rxword( "([^\\s]+)" ); rxphrase.setMinimal( TRUE ); // First, get the phrase queries while ( (pos = rxphrase.indexIn (query, 0)) != -1 ) { // A phrase query found. Locate its boundaries, and parse it. QStringList plist = rxphrase.cap ( 1 ).split ( QRegExp ("\\s+") ); validateWords ( plist, query_valid ); if ( plist.size() > 0 ) phrases_must_exist.push_back( plist ); query.remove (pos, rxphrase.matchedLength()); } // Then, parse the rest query while ( (pos = rxword.indexIn( query, 0 )) != -1 ) { // A phrase query found. Locate its boundaries, and parse it. QString word = rxword.cap ( 1 ); QChar type = '+'; if ( word[0] == '-' || word[0] == '+' ) { type = word[0]; word.remove (0, 1); } validateWord ( word, query_valid ); if ( type == '-' ) words_must_not_exist.push_back ( word ); else words_must_exist.push_back ( word ); query.remove (pos, rxword.matchedLength()); } #if defined (DUMP_SEARCH_QUERY) // Dump the search query QString qdump; for ( i = 0; i < phrases_must_exist.size(); i++ ) qdump += QString(" \"") + phrases_must_exist[i].join (" ") + QString ("\""); for ( i = 0; i < words_must_not_exist.size(); i++ ) qdump += QString (" -") + words_must_not_exist[i]; for ( i = 0; i < words_must_exist.size(); i++ ) qdump += QString (" +") + words_must_exist[i]; qDebug ("Search query dump: %s", qdump.ascii()); #endif // First search for phrases if ( phrases_must_exist.size() > 0 ) { LCHMSearchProgressResults tempres; for ( i = 0; i < phrases_must_exist.size(); i++ ) { if ( !searchPhrase ( impl(), phrases_must_exist[i], tempres ) ) return false; mergeResults ( results, tempres, true ); } } for ( i = 0; i < words_must_exist.size(); i++ ) { LCHMSearchProgressResults tempres; if ( !m_impl->searchWord ( words_must_exist[i], true, false, tempres, false ) ) return false; mergeResults ( results, tempres, true ); } for ( i = 0; i < words_must_not_exist.size(); i++ ) { LCHMSearchProgressResults tempres; m_impl->searchWord ( words_must_not_exist[i], true, false, tempres, false ); mergeResults ( results, tempres, false ); } m_impl->getSearchResults( results, searchresults, limit ); return true; }