Ejemplo n.º 1
0
void searchPhrase(const string &indexPath, istream &in, bool fuzzy) {
	IndexSearcher is(indexPath);

	string line;
	while ( getline(in, line) ) {
		line = util::trim(line);
		if ( line.length() == 0 ) continue;
		cout << line << endl;
		searchPhrase(is, line, fuzzy);
	}
}	
Ejemplo n.º 2
0
bool LCHMFile::searchQuery( const QString& inquery, QStringList * searchresults, unsigned int limit )
{
	QStringList words_must_exist, words_must_not_exist, words_highlight;
	QVector<QStringList> phrases_must_exist;
	QString query = inquery;
	bool query_valid = true;
	LCHMSearchProgressResults results;
	int pos;
	int i;	
		
	/*
	* Parse the search query with a simple state machine.
	* Query should consist of one of more words separated by a space with a possible prefix.
	 * A prefix may be:
	*   +   indicates that the word is required; any page without this word is excluded from the result.
	*   -   indicates that the word is required to be absent; any page with this word is excluded from
	*       the result.
	*   "." indicates a phrase. Anything between quotes indicates a phrase, which is set of space-separated
	*       words. Will be in result only if the words in phrase are in page in the same sequence, and
	*       follow each other.
	*   If there is no prefix, the word considered as required.
	*/
	
	QRegExp rxphrase( "\"(.*)\"" );
	QRegExp rxword( "([^\\s]+)" );
	rxphrase.setMinimal( TRUE );

	// First, get the phrase queries
	while ( (pos = rxphrase.indexIn (query, 0)) != -1 )
	{
		// A phrase query found. Locate its boundaries, and parse it.
		QStringList plist = rxphrase.cap ( 1 ).split ( QRegExp ("\\s+") );
		
		validateWords ( plist, query_valid );
		
		if ( plist.size() > 0 )
			phrases_must_exist.push_back( plist );
		
		query.remove (pos, rxphrase.matchedLength());
	}

	// Then, parse the rest query
	while ( (pos = rxword.indexIn( query, 0 )) != -1 )
	{
		// A phrase query found. Locate its boundaries, and parse it.
		QString word = rxword.cap ( 1 );
		QChar type = '+';
		
		if ( word[0] == '-' || word[0] == '+' )
		{
			type = word[0];
			word.remove (0, 1);
		}
		
		validateWord ( word, query_valid );
				
		if ( type == '-' )
			words_must_not_exist.push_back ( word );
		else
			words_must_exist.push_back ( word );
		
		query.remove (pos, rxword.matchedLength());
	}

#if defined (DUMP_SEARCH_QUERY)
	// Dump the search query
	QString qdump;
	for ( i = 0; i < phrases_must_exist.size(); i++ )
		qdump += QString(" \"") + phrases_must_exist[i].join (" ") + QString ("\"");

	for ( i = 0; i < words_must_not_exist.size(); i++ )
		qdump += QString (" -") + words_must_not_exist[i];
	
	for ( i = 0; i < words_must_exist.size(); i++ )
		qdump += QString (" +") + words_must_exist[i];

	qDebug ("Search query dump: %s", qdump.ascii());
#endif

	// First search for phrases
	if ( phrases_must_exist.size() > 0 )
	{
		LCHMSearchProgressResults tempres;
		
		for ( i = 0; i < phrases_must_exist.size(); i++ )
		{
			if ( !searchPhrase ( impl(), phrases_must_exist[i], tempres ) )
				return false;
			
			mergeResults ( results, tempres, true );
		}
	}

	for ( i = 0; i < words_must_exist.size(); i++ )
	{
		LCHMSearchProgressResults tempres;
		
		if ( !m_impl->searchWord ( words_must_exist[i], true, false, tempres, false ) )
			return false;

		mergeResults ( results, tempres, true );
	}

	for ( i = 0; i < words_must_not_exist.size(); i++ )
	{
		LCHMSearchProgressResults tempres;
		
		m_impl->searchWord ( words_must_not_exist[i], true, false, tempres, false );
		mergeResults ( results, tempres, false );
	}

	m_impl->getSearchResults( results, searchresults, limit );
	return true;
}