Ejemplo n.º 1
0
/// Validates a query and extracts its terms.
bool XapianEngine::validateQuery(QueryProperties& queryProps, bool includePrefixed,
	vector<string> &terms)
{
	bool goodQuery = false;

	try
	{
		Xapian::Query fullQuery = parseQuery(NULL, queryProps, "", true);
		if (fullQuery.empty() == false)
		{
			for (Xapian::TermIterator termIter = fullQuery.get_terms_begin();
				termIter != fullQuery.get_terms_end(); ++termIter)
			{
				// Skip prefixed terms unless instructed otherwise
				if ((includePrefixed == true) ||
					(isupper((int)((*termIter)[0])) == 0))
				{
					terms.push_back(*termIter);
				}
			}

			goodQuery = true;
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "XapianEngine::validateQuery: " << error.get_type() << ": " << error.get_msg() << endl;
	}

	return goodQuery;
}
Ejemplo n.º 2
0
void DataSystem::query(const std::string q)
{
	try {
		// Start an enquire session.
		Xapian::Enquire enquire(db);

		Xapian::Query query = qp.parse_query(q);
		printf("Parsed query is: %s", query.get_description().c_str());

		// Find the top 10 results for the query.
		enquire.set_query(query);
		Xapian::MSet matches = enquire.get_mset(0, 10);

		// Display the results.
		printf("%d results found.\n", matches.get_matches_estimated());
		printf("Matches 1-$d:\n", matches.size());

		for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i)
	{
			printf("%d: %d\% docid= [%s]\n\n",
				   i.get_rank() + 1,
				   i.get_percent(),
				   i.get_document().get_data().c_str());
		}
	} catch (const Xapian::Error &e)
	{
		printf("DataSystem, query error: %s", e.get_description());
	}
}
Ejemplo n.º 3
0
		TermDecider(Xapian::Database *pIndex,
			Xapian::Stem *pStemmer,
			Xapian::Stopper *pStopper,
			const string &allowedPrefixes,
			Xapian::Query &query) :
			Xapian::ExpandDecider(),
			m_pIndex(pIndex),
			m_pStemmer(pStemmer),
			m_pStopper(pStopper),
			m_allowedPrefixes(allowedPrefixes),
			m_pTermsToAvoid(NULL)
		{
			m_pTermsToAvoid = new set<string>();

			for (Xapian::TermIterator termIter = query.get_terms_begin();
				termIter != query.get_terms_end(); ++termIter)
			{
				string term(*termIter);

				if (isupper((int)(term[0])) == 0)
				{
					m_pTermsToAvoid->insert(term);
					if (m_pStemmer != NULL)
					{
						string stem((*m_pStemmer)(term));
						m_pTermsToAvoid->insert(stem);
					}
				}
				else if (term[0] == 'Z')
				{
					m_pTermsToAvoid->insert(term.substr(1));
				}
			}
#ifdef DEBUG
			cout << "TermDecider: avoiding " << m_pTermsToAvoid->size() << " terms" << endl;
#endif
		}
Ejemplo n.º 4
0
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps,
	const string &stemLanguage, DefaultOperator defaultOperator,
	const string &limitQuery, string &correctedFreeQuery, bool minimal)
{
	Xapian::QueryParser parser;
	CJKVTokenizer tokenizer;
	string freeQuery(queryProps.getFreeQuery());
	unsigned int tokensCount = 1;
	bool diacriticSensitive = queryProps.getDiacriticSensitive();

	// Modifying the query is necessary if it's CJKV or diacritics are off
	if ((tokenizer.has_cjkv(freeQuery) == true) ||
		(diacriticSensitive == false))
	{
		QueryModifier handler(freeQuery,
			diacriticSensitive,
			tokenizer.get_ngram_size());

		tokenizer.tokenize(freeQuery, handler, true);

		tokensCount = handler.get_tokens_count();

		// We can disable stemming and spelling correction for pure CJKV queries
		string cjkvQuery(handler.get_modified_query(minimal));
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: CJKV query is " << cjkvQuery << endl;
#endif

		// Do as if the user had given this as input
		freeQuery = cjkvQuery;
	}
	else
	{
		string::size_type spacePos = freeQuery.find(' ');
		while (spacePos != string::npos)
		{
			++tokensCount;

			if (spacePos + 1 >= freeQuery.length())
			{
				break;
			}

			// Next
			spacePos = freeQuery.find(' ', spacePos + 1);
		}
	}
#ifdef DEBUG
	cout << "XapianEngine::parseQuery: " << tokensCount << " tokens" << endl;
#endif

	if (pIndex != NULL)
	{
		// The database is required for wildcards and spelling
		parser.set_database(*pIndex);
	}

	// Set things up
	if ((minimal == false) &&
		(stemLanguage.empty() == false))
	{
		parser.set_stemmer(m_stemmer);
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);

		// Don't bother loading the stopwords list if there's only one token
		if (tokensCount > 1)
		{
			FileStopper *pStopper = FileStopper::get_stopper(Languages::toCode(stemLanguage));
			if ((pStopper != NULL) &&
				(pStopper->get_stopwords_count() > 0))
			{
				parser.set_stopper(pStopper);
			}
		}
	}
	else
	{
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: no stemming" << endl;
#endif
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
	}
	// What's the default operator ?
	if (defaultOperator == DEFAULT_OP_AND)
	{
		parser.set_default_op(Xapian::Query::OP_AND);
	}
	else
	{
		parser.set_default_op(Xapian::Query::OP_OR);
	}
#if XAPIAN_NUM_VERSION >= 1000004
	// Search across text body and title
	parser.add_prefix("", "");
	parser.add_prefix("", "S");
#endif
	// X prefixes should always include a colon
	parser.add_boolean_prefix("site", "H");
	parser.add_boolean_prefix("file", "P");
	parser.add_boolean_prefix("ext", "XEXT:");
	parser.add_prefix("title", "S");
	parser.add_boolean_prefix("url", "U");
	parser.add_boolean_prefix("dir", "XDIR:");
	parser.add_boolean_prefix("inurl", "XFILE:");
	parser.add_prefix("path", "XPATH:");
	parser.add_boolean_prefix("lang", "L");
	parser.add_boolean_prefix("type", "T");
	parser.add_boolean_prefix("class", "XCLASS:");
	parser.add_boolean_prefix("label", "XLABEL:");
	parser.add_boolean_prefix("tokens", "XTOK:");

	// Any limit on what documents should be searched ?
	if (limitQuery.empty() == false)
	{
		string limitedQuery(limitQuery);

		limitedQuery += " AND ( ";
		limitedQuery += freeQuery;
		limitedQuery += " )";
		freeQuery = limitedQuery;
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: " << freeQuery << endl;
#endif
	}

	// Date range
	Xapian::DateValueRangeProcessor dateProcessor(0);
	parser.add_valuerangeprocessor(&dateProcessor);

	// Size with a "b" suffix, ie 1024..10240b
#if XAPIAN_NUM_VERSION >= 1001000
	Xapian::NumberValueRangeProcessor sizeProcessor(2, "b", false);
	parser.add_valuerangeprocessor(&sizeProcessor);
#elif XAPIAN_NUM_VERSION >= 1000002
	// Xapian 1.02 is the bare minimum
	Xapian::v102::NumberValueRangeProcessor sizeProcessor(2, "b", false);
	parser.add_valuerangeprocessor(&sizeProcessor);
#endif

	// Time range
	TimeValueRangeProcessor timeProcessor(3);
	parser.add_valuerangeprocessor(&timeProcessor);

	// What type of query is this ?
	QueryProperties::QueryType type = queryProps.getType();
	if (type != QueryProperties::XAPIAN_QP)
	{
		map<string, string> fieldMapping;

		// Bare minimum mapping between Xesam fields and our prefixes 
		fieldMapping["dc:title"] = "S";

		XapianQueryBuilder builder(parser, fieldMapping);
		XesamParser *pParser = NULL;
		bool parsedQuery = false;

		// Get a Xesam parser
		if (type == QueryProperties::XESAM_QL)
		{
			pParser = new XesamQLParser();
		}
#ifdef HAVE_BOOST_SPIRIT_CORE_HPP
		else if (type == QueryProperties::XESAM_UL)
		{
			pParser = new XesamULParser();
		}
#endif

		if (pParser != NULL)
		{
			parsedQuery = pParser->parse(freeQuery, builder);

			delete pParser;
		}

		if (parsedQuery == true)
		{
			return builder.get_query();
		}

		return Xapian::Query();
	}

	// Do some pre-processing : look for filters with quoted values
	string::size_type escapedFilterEnd = 0;
	string::size_type escapedFilterStart = freeQuery.find(":\"");
	while ((escapedFilterStart != string::npos) &&
		(escapedFilterStart < freeQuery.length() - 2))
	{
		escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2);
		if (escapedFilterEnd == string::npos)
		{
			break;
		}

		string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2);
		if (filterValue.empty() == false)
		{
			string escapedValue(Url::escapeUrl(filterValue));
			bool escapeValue = false, hashValue = false;

			// The value should be escaped and length-limited as done at indexing time
			checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue);

			if (escapeValue == false)
			{
				// No escaping
				escapedValue = filterValue;
			}
			if (hashValue == true)
			{
				// Partially hash if necessary
				escapedValue = XapianDatabase::limitTermLength(escapedValue, true);
			}
			else
			{
				escapedValue = XapianDatabase::limitTermLength(escapedValue);
			}

			freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart,
				escapedValue);
			escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length();
		}
		else
		{
			// No value !
			freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":");
			escapedFilterEnd -= 2;
		}
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl;
#endif

		// Next
		escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd);
	}

	// Parse the query string with all necessary options
	unsigned int flags = Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE|
		Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_PURE_NOT;
	if (minimal == false)
	{
		flags |= Xapian::QueryParser::FLAG_WILDCARD;
#if ENABLE_XAPIAN_SPELLING_CORRECTION>0
		flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION;
#endif
	}
	Xapian::Query parsedQuery = parser.parse_query(freeQuery, flags);
#ifdef DEBUG
	cout << "XapianEngine::parseQuery: " << parsedQuery.get_description() << endl;
#endif

	if (minimal == false)
	{
#if ENABLE_XAPIAN_SPELLING_CORRECTION>0
		// Any correction ?
		correctedFreeQuery = parser.get_corrected_query_string();
#ifdef DEBUG
		if (correctedFreeQuery.empty() == false)
		{
			cout << "XapianEngine::parseQuery: corrected spelling to: " << correctedFreeQuery << endl;
		}
#endif
#endif
	}

	return parsedQuery;
}
Ejemplo n.º 5
0
/// Runs a query; true if success.
bool XapianEngine::runQuery(QueryProperties& queryProps,
	unsigned int startDoc)
{
	string stemLanguage(Languages::toEnglish(queryProps.getStemmingLanguage()));

	// Clear the results list
	m_resultsList.clear();
	m_resultsCountEstimate = 0;
	m_correctedFreeQuery.clear();

	if (queryProps.isEmpty() == true)
	{
#ifdef DEBUG
		cout << "XapianEngine::runQuery: query is empty" << endl;
#endif
		return false;
	}

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true);
	if (pDatabase == NULL)
	{
		cerr << "Couldn't get index " << m_databaseName << endl;
		return false;
	}

	if (stemLanguage.empty() == false)
	{
#ifdef DEBUG
		cout << "XapianEngine::runQuery: " << stemLanguage << " stemming" << endl;
#endif
		try
		{
			m_stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage));
		}
		catch (const Xapian::Error &error)
		{
			cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl;
		}
	}

	// Get the latest revision...
	pDatabase->reopen();
	Xapian::Database *pIndex = pDatabase->readLock();
	try
	{
		unsigned int searchStep = 1;

		// Searches are run in this order :
		// 1. no stemming, exact matches only
		// 2. stem terms if a language is defined for the query
		Xapian::Query fullQuery = parseQuery(pIndex, queryProps, "",
			m_defaultOperator, m_limitQuery, m_correctedFreeQuery);
		while (fullQuery.empty() == false)
		{
			// Query the database
			if (queryDatabase(pIndex, fullQuery, stemLanguage, startDoc, queryProps) == false)
			{
				break;
			}

			if (m_resultsList.empty() == true)
			{
				// The search did succeed but didn't return anything
				if ((searchStep == 1) &&
					(stemLanguage.empty() == false))
				{
#ifdef DEBUG
					cout << "XapianEngine::runQuery: trying again with stemming" << endl;
#endif
					fullQuery = parseQuery(pIndex, queryProps, stemLanguage,
						m_defaultOperator, m_limitQuery, m_correctedFreeQuery);
					++searchStep;
					continue;
				}
			}
			else
			{
				// We have results, don't bother about correcting the query
				m_correctedFreeQuery.clear();
			}

			pDatabase->unlock();
			return true;
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't run query: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	pDatabase->unlock();

	return false;
}
Ejemplo n.º 6
0
/// Runs a query; true if success.
bool XapianEngine::runQuery(QueryProperties& queryProps)
{
	// Clear the results list
	m_resultsList.clear();

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true);
	if (pDatabase == NULL)
	{
		return false;
	}

	// Get the latest revision...
	pDatabase->reopen();
	Xapian::Database *pIndex = pDatabase->readLock();
	try
	{
		string stemLanguage;
		unsigned int searchStep = 1;
		bool followOperators = true;

		// Searches are run in this order :
		// 1. follow operators and don't stem terms
		// 2. if no results, follow operators and stem terms
		// 3. if no results, don't follow operators and don't stem terms
		// 4. if no results, don't follow operators and stem terms
		// Steps 2 and 4 depend on a language being defined for the query
		Xapian::Query fullQuery = parseQuery(pIndex, queryProps, "", followOperators);
		while (fullQuery.empty() == false)
		{
#ifdef DEBUG
			cout << "XapianEngine::runQuery: " << fullQuery.get_description() << endl;
#endif
			// Query the database
			if (queryDatabase(pIndex, fullQuery) == false)
			{
				break;
			}

			if (m_resultsList.empty() == true)
			{
				// The search did succeed but didn't return anything
				// Try the next step
				switch (++searchStep)
				{
					case 2:
						followOperators = true;
						stemLanguage = queryProps.getLanguage();
						if (stemLanguage.empty() == false)
						{
							break;
						}
						++searchStep;
					case 3:
						followOperators = false;
						stemLanguage.clear();
						break;
					case 4:
						followOperators = false;
						stemLanguage = queryProps.getLanguage();
						if (stemLanguage.empty() == false)
						{
							break;
						}
						++searchStep;
					default:
						pDatabase->unlock();
						return true;
				}

#ifdef DEBUG
				cout << "XapianEngine::runQuery: trying step " << searchStep << endl;
#endif
				fullQuery = parseQuery(pIndex, queryProps,
					Languages::toEnglish(stemLanguage), followOperators);
				continue;
			}

			pDatabase->unlock();
			return true;
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "XapianEngine::runQuery: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	pDatabase->unlock();

	return false;
}
Ejemplo n.º 7
0
bool XapianEngine::queryDatabase(Xapian::Query &query)
{
	bool bStatus = false;

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true);
	if (pDatabase == NULL)
	{
		return false;
	}

	// Get the latest revision...
	pDatabase->reopen();
	Xapian::Database *pIndex = pDatabase->readLock();
	if (pIndex != NULL)
	{
		try
		{
			// Start an enquire session on the database
			Xapian::Enquire enquire(*pIndex);

			// Give the query object to the enquire session
			enquire.set_query(query);

			// Get the top results of the query
			Xapian::MSet matches = enquire.get_mset(0, m_maxResultsCount);
			if (matches.empty() == false)
			{
				multimap<Xapian::weight, string> queryTerms;
				vector<string> seedTerms;
				Xapian::weight maxWeight = matches.get_max_attained();

				// Sort query terms by weight
				for (Xapian::TermIterator termIter = query.get_terms_begin();
					termIter != query.get_terms_end(); ++termIter)
				{
					string termName(*termIter);
					Xapian::weight termWeight = maxWeight - matches.get_termweight(termName);

					queryTerms.insert(pair<Xapian::weight, string>(termWeight, termName));
#ifdef DEBUG
					cout << "XapianEngine::queryDatabase: term " << termName
						<< " has weight " << matches.get_termweight(termName) << "/" << maxWeight << endl;
#endif
				}

				for (multimap<Xapian::weight, string>::iterator weightIter = queryTerms.begin();
					weightIter != queryTerms.end(); ++weightIter)
				{
					seedTerms.push_back(weightIter->second);
				}

				// Get the results
#ifdef DEBUG
				cout << "XapianEngine::queryDatabase: " << matches.get_matches_estimated() << "/" << m_maxResultsCount << " results found" << endl;
#endif
				for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter)
				{
					Xapian::docid docId = *mIter;
					Xapian::Document doc(mIter.get_document());
					string record = doc.get_data();

					// Get the title
					string title = StringManip::extractField(record, "caption=", "\n");
#ifdef DEBUG
					cout << "XapianEngine::queryDatabase: found omindex title " << title << endl;
#endif
					// Get the URL
					string url = StringManip::extractField(record, "url=", "\n");
					if (url.empty() == true)
					{
						// Hmmm this shouldn't be empty...
						// Use this instead, even though the document isn't cached in the index
						url = XapianDatabase::buildUrl(m_databaseName, *mIter);
					}
					else
					{
#ifdef DEBUG
						cout << "XapianEngine::queryDatabase: found omindex URL " << url << endl;
#endif
						url = Url::canonicalizeUrl(url);
					}

					// Get the type
					string type = StringManip::extractField(record, "type=", "\n");
					// ...and the language, if available
					string language = StringManip::extractField(record, "language=", "\n");

					// Finally, get a summary
					string summary = StringManip::extractField(record, "sample=", "\n");
					if (summary.empty() == true)
					{
						AbstractGenerator abstractGen(pIndex, 50);

						// Generate an abstract based on the query's terms
						summary = abstractGen.generateAbstract(seedTerms, docId);
					}

					// Add this result
					Result thisResult(url, title, summary, language, (float)mIter.get_percent());
					m_resultsList.push_back(thisResult);
				}
			}

			m_expandTerms.clear();

			// Expand the query ?
			if (m_relevantDocuments.empty() == false)
			{
				Xapian::RSet relevantDocs;
				unsigned int count = 0;

				for (set<unsigned int>::const_iterator docIter = m_relevantDocuments.begin();
					docIter != m_relevantDocuments.end(); ++docIter)
				{
					relevantDocs.add_document(*docIter);
				}

				// Get 10 non-prefixed terms
				Xapian::ESet expandTerms = enquire.get_eset(20, relevantDocs);
				for (Xapian::ESetIterator termIter = expandTerms.begin();
					(termIter != expandTerms.end()) && (count < 10); ++termIter)
				{
					if (isupper((int)((*termIter)[0])) == 0)
					{
						m_expandTerms.insert(*termIter);
						++count;
					}
				}
			}

			bStatus = true;
		}
		catch (const Xapian::Error &error)
		{
			cerr << "XapianEngine::queryDatabase: " << error.get_type() << ": " << error.get_msg() << endl;
		}
	}
	pDatabase->unlock();

	return bStatus;
}
Ejemplo n.º 8
0
int main(int argc, char **argv)
{
    if(argc < 2) {
        usage(argv);
        return 1;
    }

    try {
        char *action = argv[1];
        char *db_path = argv[2];

        if(!strcmp(action, "index")) {
            Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN);

            Xapian::TermGenerator indexer;
            Xapian::Stem stemmer("english");
            indexer.set_stemmer(stemmer);

            std::string doc_txt;
            while(true) {
                if(std::cin.eof()) break;

                std::string line;
                getline(std::cin, line);
                doc_txt += line;
            }

            if(!doc_txt.empty()) {
                Xapian::Document doc;
                doc.set_data(doc_txt);

                indexer.set_document(doc);
                indexer.index_text(doc_txt);

                db.add_document(doc);

                std::cout << "Indexed: " << indexer.get_description() << std::endl;
            }

            db.commit();
        } else if(!strcmp(action, "search")) {
            if(argc < 4) {
                std::cerr << "You must supply a query string" << std::endl;
                return 1;
            }

            Xapian::Database db(db_path);
            Xapian::Enquire enquire(db);

            std::string query_str = argv[3];
            argv+= 4;
            while(*argv) {
                query_str += ' ';
                query_str += *argv++;
            }

            Xapian::QueryParser qp;
            Xapian::Stem stemmer("english");
            qp.set_stemmer(stemmer);
            qp.set_database(db);
            qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);

            Xapian::Query query = qp.parse_query(query_str);
            std::cout << "Parsed query is: " << query.get_description() <<
                         std::endl;

            enquire.set_query(query);
            Xapian::MSet matches = enquire.get_mset(0, 10);

            std::cout << matches.get_matches_estimated() << " results found.\n";
            std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl;

            for (Xapian::MSetIterator i = matches.begin();
                    i != matches.end(); ++i) {
                std::cout << i.get_rank() + 1 << ": " << i.get_percent() <<
                        "% docid=" << *i << " [" <<
                        i.get_document().get_data()<< "]" << std::endl <<
                        std::endl;
            }
        } else {
            std::cerr << "Invalid action " << action << std::endl;
            usage(argv);
            return 1;
        }

    } catch (const Xapian::Error &error) {
        std::cout << "Exception: " << error.get_msg() << std::endl;
    }
}
Ejemplo n.º 9
0
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps,
	const string &stemLanguage, DefaultOperator defaultOperator,
	const string &limitQuery, string &correctedFreeQuery, bool minimal)
{
	Xapian::QueryParser parser;
	Xapian::Stem stemmer;
	CJKVTokenizer tokenizer;
	string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " "));
	unsigned int minDay, minMonth, minYear = 0;
	unsigned int maxDay, maxMonth, maxYear = 0;

	if (tokenizer.has_cjkv_only(freeQuery) == true)
	{
		vector<string> tokens;
		string cjkvQuery;

		tokenizer.tokenize(freeQuery, tokens);

		// Get the terms
		for (vector<string>::const_iterator tokenIter = tokens.begin();
			tokenIter != tokens.end(); ++tokenIter)
		{
			cjkvQuery += *tokenIter;
			cjkvQuery += " ";
		}
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: CJKV query is " << cjkvQuery << endl;
#endif

		// Do as if the user had given this as input
		freeQuery = cjkvQuery;
		// We can disable stemming and spelling correction
		minimal = true;
	}

	if (pIndex != NULL)
	{
		// The database is required for wildcards and spelling
		parser.set_database(*pIndex);
	}

	// Set things up
	if ((minimal == false) &&
		(stemLanguage.empty() == false))
	{
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: " << stemLanguage << " stemming" << endl;
#endif
		try
		{
			stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage));
		}
		catch (const Xapian::Error &error)
		{
			cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl;
		}
		parser.set_stemmer(stemmer);
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
	}
	else
	{
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: no stemming" << endl;
#endif
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
	}
	// What's the default operator ?
	if (defaultOperator == DEFAULT_OP_AND)
	{
		parser.set_default_op(Xapian::Query::OP_AND);
	}
	else
	{
		parser.set_default_op(Xapian::Query::OP_OR);
	}
	// X prefixes should always include a colon
	parser.add_boolean_prefix("site", "H");
	parser.add_boolean_prefix("file", "P");
	parser.add_boolean_prefix("ext", "XEXT:");
	parser.add_prefix("title", "S");
	parser.add_boolean_prefix("url", "U");
	parser.add_boolean_prefix("dir", "XDIR:");
	parser.add_boolean_prefix("lang", "L");
	parser.add_boolean_prefix("type", "T");
	parser.add_boolean_prefix("class", "XCLASS:");
	parser.add_boolean_prefix("label", "XLABEL:");
	parser.add_boolean_prefix("tokens", "XTOK:");

	// Any limit on what documents should be searched ?
	if (limitQuery.empty() == false)
	{
		string limitedQuery(limitQuery);

		limitedQuery += " AND ( ";
		limitedQuery += freeQuery;
		limitedQuery += " )";
		freeQuery = limitedQuery;
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: " << freeQuery << endl;
#endif
	}

	// Date range
	Xapian::DateValueRangeProcessor dateProcessor(0);
	parser.add_valuerangeprocessor(&dateProcessor);

	// Size with a "b" suffix, ie 1024..10240b
#if XAPIAN_NUM_VERSION >= 1001000
	Xapian::NumberValueRangeProcessor sizeProcessor(2, "b", false);
	parser.add_valuerangeprocessor(&sizeProcessor);
#elif XAPIAN_NUM_VERSION >= 1000002
	// Xapian 1.02 is the bare minimum
	Xapian::v102::NumberValueRangeProcessor sizeProcessor(2, "b", false);
	parser.add_valuerangeprocessor(&sizeProcessor);
#endif

	// Time range
	TimeValueRangeProcessor timeProcessor(3);
	parser.add_valuerangeprocessor(&timeProcessor);

	// What type of query is this ?
	QueryProperties::QueryType type = queryProps.getType();
	if (type != QueryProperties::XAPIAN_QP)
	{
		map<string, string> fieldMapping;

		// Bare minimum mapping between Xesam fields and our prefixes 
		fieldMapping["dc:title"] = "S";

		XapianQueryBuilder builder(parser, fieldMapping);
		XesamParser *pParser = NULL;

		// Get a Xesam parser
		if (type == QueryProperties::XESAM_QL)
		{
			pParser = new XesamQLParser();
		}
#ifdef HAVE_BOOST_SPIRIT_CORE_HPP
		else if (type == QueryProperties::XESAM_UL)
		{
			pParser = new XesamULParser();
		}
#endif

		if (pParser != NULL)
		{
			bool parsedQuery = pParser->parse(freeQuery, builder);

			delete pParser;

			if (parsedQuery == true)
			{
				return builder.get_query();
			}
		}

		return Xapian::Query();
	}

	// Do some pre-processing : look for filters with quoted values
	string::size_type escapedFilterEnd = 0;
	string::size_type escapedFilterStart = freeQuery.find(":\"");
	while ((escapedFilterStart != string::npos) &&
		(escapedFilterStart < freeQuery.length() - 2))
	{
		escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2);
		if (escapedFilterEnd == string::npos)
		{
			break;
		}

		string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2);
		if (filterValue.empty() == false)
		{
			string escapedValue(Url::escapeUrl(filterValue));
			bool escapeValue = false, hashValue = false;

			// The value should be escaped and length-limited as done at indexing time
			checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue);

			if (escapeValue == false)
			{
				// No escaping
				escapedValue = filterValue;
			}
			if (hashValue == true)
			{
				// Partially hash if necessary
				escapedValue = XapianDatabase::limitTermLength(escapedValue, true);
			}
			else
			{
				escapedValue = XapianDatabase::limitTermLength(escapedValue);
			}

			freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart,
				escapedValue);
			escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length();
		}
		else
		{
			// No value !
			freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":");
			escapedFilterEnd -= 2;
		}
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl;
#endif

		// Next
		escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd);
	}

	// Parse the query string with all necessary options
	unsigned int flags = Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE|
		Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE|
		Xapian::QueryParser::FLAG_PURE_NOT;
	if (minimal == false)
	{
		flags |= Xapian::QueryParser::FLAG_WILDCARD;
#if ENABLE_XAPIAN_SPELLING_CORRECTION>0
		flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION;
#endif
	}
	Xapian::Query parsedQuery = parser.parse_query(freeQuery, flags);
#ifdef DEBUG
	cout << "XapianEngine::parseQuery: " << parsedQuery.get_description() << endl;
#endif

	if (minimal == false)
	{

#if ENABLE_XAPIAN_SPELLING_CORRECTION>0
		// Any correction ?
		correctedFreeQuery = parser.get_corrected_query_string();
#ifdef DEBUG
		if (correctedFreeQuery.empty() == false)
		{
			cout << "XapianEngine::parseQuery: corrected spelling to: " << correctedFreeQuery << endl;
		}
#endif
#endif
	}

	return parsedQuery;
}