/// Validates a query and extracts its terms. bool XapianEngine::validateQuery(QueryProperties& queryProps, bool includePrefixed, vector<string> &terms) { bool goodQuery = false; try { Xapian::Query fullQuery = parseQuery(NULL, queryProps, "", true); if (fullQuery.empty() == false) { for (Xapian::TermIterator termIter = fullQuery.get_terms_begin(); termIter != fullQuery.get_terms_end(); ++termIter) { // Skip prefixed terms unless instructed otherwise if ((includePrefixed == true) || (isupper((int)((*termIter)[0])) == 0)) { terms.push_back(*termIter); } } goodQuery = true; } } catch (const Xapian::Error &error) { cerr << "XapianEngine::validateQuery: " << error.get_type() << ": " << error.get_msg() << endl; } return goodQuery; }
void DataSystem::query(const std::string q) { try { // Start an enquire session. Xapian::Enquire enquire(db); Xapian::Query query = qp.parse_query(q); printf("Parsed query is: %s", query.get_description().c_str()); // Find the top 10 results for the query. enquire.set_query(query); Xapian::MSet matches = enquire.get_mset(0, 10); // Display the results. printf("%d results found.\n", matches.get_matches_estimated()); printf("Matches 1-$d:\n", matches.size()); for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { printf("%d: %d\% docid= [%s]\n\n", i.get_rank() + 1, i.get_percent(), i.get_document().get_data().c_str()); } } catch (const Xapian::Error &e) { printf("DataSystem, query error: %s", e.get_description()); } }
TermDecider(Xapian::Database *pIndex, Xapian::Stem *pStemmer, Xapian::Stopper *pStopper, const string &allowedPrefixes, Xapian::Query &query) : Xapian::ExpandDecider(), m_pIndex(pIndex), m_pStemmer(pStemmer), m_pStopper(pStopper), m_allowedPrefixes(allowedPrefixes), m_pTermsToAvoid(NULL) { m_pTermsToAvoid = new set<string>(); for (Xapian::TermIterator termIter = query.get_terms_begin(); termIter != query.get_terms_end(); ++termIter) { string term(*termIter); if (isupper((int)(term[0])) == 0) { m_pTermsToAvoid->insert(term); if (m_pStemmer != NULL) { string stem((*m_pStemmer)(term)); m_pTermsToAvoid->insert(stem); } } else if (term[0] == 'Z') { m_pTermsToAvoid->insert(term.substr(1)); } } #ifdef DEBUG cout << "TermDecider: avoiding " << m_pTermsToAvoid->size() << " terms" << endl; #endif }
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps, const string &stemLanguage, DefaultOperator defaultOperator, const string &limitQuery, string &correctedFreeQuery, bool minimal) { Xapian::QueryParser parser; CJKVTokenizer tokenizer; string freeQuery(queryProps.getFreeQuery()); unsigned int tokensCount = 1; bool diacriticSensitive = queryProps.getDiacriticSensitive(); // Modifying the query is necessary if it's CJKV or diacritics are off if ((tokenizer.has_cjkv(freeQuery) == true) || (diacriticSensitive == false)) { QueryModifier handler(freeQuery, diacriticSensitive, tokenizer.get_ngram_size()); tokenizer.tokenize(freeQuery, handler, true); tokensCount = handler.get_tokens_count(); // We can disable stemming and spelling correction for pure CJKV queries string cjkvQuery(handler.get_modified_query(minimal)); #ifdef DEBUG cout << "XapianEngine::parseQuery: CJKV query is " << cjkvQuery << endl; #endif // Do as if the user had given this as input freeQuery = cjkvQuery; } else { string::size_type spacePos = freeQuery.find(' '); while (spacePos != string::npos) { ++tokensCount; if (spacePos + 1 >= freeQuery.length()) { break; } // Next spacePos = freeQuery.find(' ', spacePos + 1); } } #ifdef DEBUG cout << "XapianEngine::parseQuery: " << tokensCount << " tokens" << endl; #endif if (pIndex != NULL) { // The database is required for wildcards and spelling parser.set_database(*pIndex); } // Set things up if ((minimal == false) && (stemLanguage.empty() == false)) { parser.set_stemmer(m_stemmer); parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); // Don't bother loading the stopwords list if there's only one token if (tokensCount > 1) { FileStopper *pStopper = FileStopper::get_stopper(Languages::toCode(stemLanguage)); if ((pStopper != NULL) && (pStopper->get_stopwords_count() > 0)) { parser.set_stopper(pStopper); } } } else { #ifdef DEBUG cout << "XapianEngine::parseQuery: no stemming" << endl; #endif parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); } // What's the default operator ? if (defaultOperator == DEFAULT_OP_AND) { parser.set_default_op(Xapian::Query::OP_AND); } else { parser.set_default_op(Xapian::Query::OP_OR); } #if XAPIAN_NUM_VERSION >= 1000004 // Search across text body and title parser.add_prefix("", ""); parser.add_prefix("", "S"); #endif // X prefixes should always include a colon parser.add_boolean_prefix("site", "H"); parser.add_boolean_prefix("file", "P"); parser.add_boolean_prefix("ext", "XEXT:"); parser.add_prefix("title", "S"); parser.add_boolean_prefix("url", "U"); parser.add_boolean_prefix("dir", "XDIR:"); parser.add_boolean_prefix("inurl", "XFILE:"); parser.add_prefix("path", "XPATH:"); parser.add_boolean_prefix("lang", "L"); parser.add_boolean_prefix("type", "T"); parser.add_boolean_prefix("class", "XCLASS:"); parser.add_boolean_prefix("label", "XLABEL:"); parser.add_boolean_prefix("tokens", "XTOK:"); // Any limit on what documents should be searched ? if (limitQuery.empty() == false) { string limitedQuery(limitQuery); limitedQuery += " AND ( "; limitedQuery += freeQuery; limitedQuery += " )"; freeQuery = limitedQuery; #ifdef DEBUG cout << "XapianEngine::parseQuery: " << freeQuery << endl; #endif } // Date range Xapian::DateValueRangeProcessor dateProcessor(0); parser.add_valuerangeprocessor(&dateProcessor); // Size with a "b" suffix, ie 1024..10240b #if XAPIAN_NUM_VERSION >= 1001000 Xapian::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #elif XAPIAN_NUM_VERSION >= 1000002 // Xapian 1.02 is the bare minimum Xapian::v102::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #endif // Time range TimeValueRangeProcessor timeProcessor(3); parser.add_valuerangeprocessor(&timeProcessor); // What type of query is this ? QueryProperties::QueryType type = queryProps.getType(); if (type != QueryProperties::XAPIAN_QP) { map<string, string> fieldMapping; // Bare minimum mapping between Xesam fields and our prefixes fieldMapping["dc:title"] = "S"; XapianQueryBuilder builder(parser, fieldMapping); XesamParser *pParser = NULL; bool parsedQuery = false; // Get a Xesam parser if (type == QueryProperties::XESAM_QL) { pParser = new XesamQLParser(); } #ifdef HAVE_BOOST_SPIRIT_CORE_HPP else if (type == QueryProperties::XESAM_UL) { pParser = new XesamULParser(); } #endif if (pParser != NULL) { parsedQuery = pParser->parse(freeQuery, builder); delete pParser; } if (parsedQuery == true) { return builder.get_query(); } return Xapian::Query(); } // Do some pre-processing : look for filters with quoted values string::size_type escapedFilterEnd = 0; string::size_type escapedFilterStart = freeQuery.find(":\""); while ((escapedFilterStart != string::npos) && (escapedFilterStart < freeQuery.length() - 2)) { escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2); if (escapedFilterEnd == string::npos) { break; } string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2); if (filterValue.empty() == false) { string escapedValue(Url::escapeUrl(filterValue)); bool escapeValue = false, hashValue = false; // The value should be escaped and length-limited as done at indexing time checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue); if (escapeValue == false) { // No escaping escapedValue = filterValue; } if (hashValue == true) { // Partially hash if necessary escapedValue = XapianDatabase::limitTermLength(escapedValue, true); } else { escapedValue = XapianDatabase::limitTermLength(escapedValue); } freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart, escapedValue); escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length(); } else { // No value ! freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":"); escapedFilterEnd -= 2; } #ifdef DEBUG cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl; #endif // Next escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd); } // Parse the query string with all necessary options unsigned int flags = Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE| Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_PURE_NOT; if (minimal == false) { flags |= Xapian::QueryParser::FLAG_WILDCARD; #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION; #endif } Xapian::Query parsedQuery = parser.parse_query(freeQuery, flags); #ifdef DEBUG cout << "XapianEngine::parseQuery: " << parsedQuery.get_description() << endl; #endif if (minimal == false) { #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 // Any correction ? correctedFreeQuery = parser.get_corrected_query_string(); #ifdef DEBUG if (correctedFreeQuery.empty() == false) { cout << "XapianEngine::parseQuery: corrected spelling to: " << correctedFreeQuery << endl; } #endif #endif } return parsedQuery; }
/// Runs a query; true if success. bool XapianEngine::runQuery(QueryProperties& queryProps, unsigned int startDoc) { string stemLanguage(Languages::toEnglish(queryProps.getStemmingLanguage())); // Clear the results list m_resultsList.clear(); m_resultsCountEstimate = 0; m_correctedFreeQuery.clear(); if (queryProps.isEmpty() == true) { #ifdef DEBUG cout << "XapianEngine::runQuery: query is empty" << endl; #endif return false; } XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true); if (pDatabase == NULL) { cerr << "Couldn't get index " << m_databaseName << endl; return false; } if (stemLanguage.empty() == false) { #ifdef DEBUG cout << "XapianEngine::runQuery: " << stemLanguage << " stemming" << endl; #endif try { m_stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage)); } catch (const Xapian::Error &error) { cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl; } } // Get the latest revision... pDatabase->reopen(); Xapian::Database *pIndex = pDatabase->readLock(); try { unsigned int searchStep = 1; // Searches are run in this order : // 1. no stemming, exact matches only // 2. stem terms if a language is defined for the query Xapian::Query fullQuery = parseQuery(pIndex, queryProps, "", m_defaultOperator, m_limitQuery, m_correctedFreeQuery); while (fullQuery.empty() == false) { // Query the database if (queryDatabase(pIndex, fullQuery, stemLanguage, startDoc, queryProps) == false) { break; } if (m_resultsList.empty() == true) { // The search did succeed but didn't return anything if ((searchStep == 1) && (stemLanguage.empty() == false)) { #ifdef DEBUG cout << "XapianEngine::runQuery: trying again with stemming" << endl; #endif fullQuery = parseQuery(pIndex, queryProps, stemLanguage, m_defaultOperator, m_limitQuery, m_correctedFreeQuery); ++searchStep; continue; } } else { // We have results, don't bother about correcting the query m_correctedFreeQuery.clear(); } pDatabase->unlock(); return true; } } catch (const Xapian::Error &error) { cerr << "Couldn't run query: " << error.get_type() << ": " << error.get_msg() << endl; } pDatabase->unlock(); return false; }
/// Runs a query; true if success. bool XapianEngine::runQuery(QueryProperties& queryProps) { // Clear the results list m_resultsList.clear(); XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true); if (pDatabase == NULL) { return false; } // Get the latest revision... pDatabase->reopen(); Xapian::Database *pIndex = pDatabase->readLock(); try { string stemLanguage; unsigned int searchStep = 1; bool followOperators = true; // Searches are run in this order : // 1. follow operators and don't stem terms // 2. if no results, follow operators and stem terms // 3. if no results, don't follow operators and don't stem terms // 4. if no results, don't follow operators and stem terms // Steps 2 and 4 depend on a language being defined for the query Xapian::Query fullQuery = parseQuery(pIndex, queryProps, "", followOperators); while (fullQuery.empty() == false) { #ifdef DEBUG cout << "XapianEngine::runQuery: " << fullQuery.get_description() << endl; #endif // Query the database if (queryDatabase(pIndex, fullQuery) == false) { break; } if (m_resultsList.empty() == true) { // The search did succeed but didn't return anything // Try the next step switch (++searchStep) { case 2: followOperators = true; stemLanguage = queryProps.getLanguage(); if (stemLanguage.empty() == false) { break; } ++searchStep; case 3: followOperators = false; stemLanguage.clear(); break; case 4: followOperators = false; stemLanguage = queryProps.getLanguage(); if (stemLanguage.empty() == false) { break; } ++searchStep; default: pDatabase->unlock(); return true; } #ifdef DEBUG cout << "XapianEngine::runQuery: trying step " << searchStep << endl; #endif fullQuery = parseQuery(pIndex, queryProps, Languages::toEnglish(stemLanguage), followOperators); continue; } pDatabase->unlock(); return true; } } catch (const Xapian::Error &error) { cerr << "XapianEngine::runQuery: " << error.get_type() << ": " << error.get_msg() << endl; } pDatabase->unlock(); return false; }
bool XapianEngine::queryDatabase(Xapian::Query &query) { bool bStatus = false; XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true); if (pDatabase == NULL) { return false; } // Get the latest revision... pDatabase->reopen(); Xapian::Database *pIndex = pDatabase->readLock(); if (pIndex != NULL) { try { // Start an enquire session on the database Xapian::Enquire enquire(*pIndex); // Give the query object to the enquire session enquire.set_query(query); // Get the top results of the query Xapian::MSet matches = enquire.get_mset(0, m_maxResultsCount); if (matches.empty() == false) { multimap<Xapian::weight, string> queryTerms; vector<string> seedTerms; Xapian::weight maxWeight = matches.get_max_attained(); // Sort query terms by weight for (Xapian::TermIterator termIter = query.get_terms_begin(); termIter != query.get_terms_end(); ++termIter) { string termName(*termIter); Xapian::weight termWeight = maxWeight - matches.get_termweight(termName); queryTerms.insert(pair<Xapian::weight, string>(termWeight, termName)); #ifdef DEBUG cout << "XapianEngine::queryDatabase: term " << termName << " has weight " << matches.get_termweight(termName) << "/" << maxWeight << endl; #endif } for (multimap<Xapian::weight, string>::iterator weightIter = queryTerms.begin(); weightIter != queryTerms.end(); ++weightIter) { seedTerms.push_back(weightIter->second); } // Get the results #ifdef DEBUG cout << "XapianEngine::queryDatabase: " << matches.get_matches_estimated() << "/" << m_maxResultsCount << " results found" << endl; #endif for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter) { Xapian::docid docId = *mIter; Xapian::Document doc(mIter.get_document()); string record = doc.get_data(); // Get the title string title = StringManip::extractField(record, "caption=", "\n"); #ifdef DEBUG cout << "XapianEngine::queryDatabase: found omindex title " << title << endl; #endif // Get the URL string url = StringManip::extractField(record, "url=", "\n"); if (url.empty() == true) { // Hmmm this shouldn't be empty... // Use this instead, even though the document isn't cached in the index url = XapianDatabase::buildUrl(m_databaseName, *mIter); } else { #ifdef DEBUG cout << "XapianEngine::queryDatabase: found omindex URL " << url << endl; #endif url = Url::canonicalizeUrl(url); } // Get the type string type = StringManip::extractField(record, "type=", "\n"); // ...and the language, if available string language = StringManip::extractField(record, "language=", "\n"); // Finally, get a summary string summary = StringManip::extractField(record, "sample=", "\n"); if (summary.empty() == true) { AbstractGenerator abstractGen(pIndex, 50); // Generate an abstract based on the query's terms summary = abstractGen.generateAbstract(seedTerms, docId); } // Add this result Result thisResult(url, title, summary, language, (float)mIter.get_percent()); m_resultsList.push_back(thisResult); } } m_expandTerms.clear(); // Expand the query ? if (m_relevantDocuments.empty() == false) { Xapian::RSet relevantDocs; unsigned int count = 0; for (set<unsigned int>::const_iterator docIter = m_relevantDocuments.begin(); docIter != m_relevantDocuments.end(); ++docIter) { relevantDocs.add_document(*docIter); } // Get 10 non-prefixed terms Xapian::ESet expandTerms = enquire.get_eset(20, relevantDocs); for (Xapian::ESetIterator termIter = expandTerms.begin(); (termIter != expandTerms.end()) && (count < 10); ++termIter) { if (isupper((int)((*termIter)[0])) == 0) { m_expandTerms.insert(*termIter); ++count; } } } bStatus = true; } catch (const Xapian::Error &error) { cerr << "XapianEngine::queryDatabase: " << error.get_type() << ": " << error.get_msg() << endl; } } pDatabase->unlock(); return bStatus; }
int main(int argc, char **argv) { if(argc < 2) { usage(argv); return 1; } try { char *action = argv[1]; char *db_path = argv[2]; if(!strcmp(action, "index")) { Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN); Xapian::TermGenerator indexer; Xapian::Stem stemmer("english"); indexer.set_stemmer(stemmer); std::string doc_txt; while(true) { if(std::cin.eof()) break; std::string line; getline(std::cin, line); doc_txt += line; } if(!doc_txt.empty()) { Xapian::Document doc; doc.set_data(doc_txt); indexer.set_document(doc); indexer.index_text(doc_txt); db.add_document(doc); std::cout << "Indexed: " << indexer.get_description() << std::endl; } db.commit(); } else if(!strcmp(action, "search")) { if(argc < 4) { std::cerr << "You must supply a query string" << std::endl; return 1; } Xapian::Database db(db_path); Xapian::Enquire enquire(db); std::string query_str = argv[3]; argv+= 4; while(*argv) { query_str += ' '; query_str += *argv++; } Xapian::QueryParser qp; Xapian::Stem stemmer("english"); qp.set_stemmer(stemmer); qp.set_database(db); qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); Xapian::Query query = qp.parse_query(query_str); std::cout << "Parsed query is: " << query.get_description() << std::endl; enquire.set_query(query); Xapian::MSet matches = enquire.get_mset(0, 10); std::cout << matches.get_matches_estimated() << " results found.\n"; std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl; for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { std::cout << i.get_rank() + 1 << ": " << i.get_percent() << "% docid=" << *i << " [" << i.get_document().get_data()<< "]" << std::endl << std::endl; } } else { std::cerr << "Invalid action " << action << std::endl; usage(argv); return 1; } } catch (const Xapian::Error &error) { std::cout << "Exception: " << error.get_msg() << std::endl; } }
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps, const string &stemLanguage, DefaultOperator defaultOperator, const string &limitQuery, string &correctedFreeQuery, bool minimal) { Xapian::QueryParser parser; Xapian::Stem stemmer; CJKVTokenizer tokenizer; string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " ")); unsigned int minDay, minMonth, minYear = 0; unsigned int maxDay, maxMonth, maxYear = 0; if (tokenizer.has_cjkv_only(freeQuery) == true) { vector<string> tokens; string cjkvQuery; tokenizer.tokenize(freeQuery, tokens); // Get the terms for (vector<string>::const_iterator tokenIter = tokens.begin(); tokenIter != tokens.end(); ++tokenIter) { cjkvQuery += *tokenIter; cjkvQuery += " "; } #ifdef DEBUG cout << "XapianEngine::parseQuery: CJKV query is " << cjkvQuery << endl; #endif // Do as if the user had given this as input freeQuery = cjkvQuery; // We can disable stemming and spelling correction minimal = true; } if (pIndex != NULL) { // The database is required for wildcards and spelling parser.set_database(*pIndex); } // Set things up if ((minimal == false) && (stemLanguage.empty() == false)) { #ifdef DEBUG cout << "XapianEngine::parseQuery: " << stemLanguage << " stemming" << endl; #endif try { stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage)); } catch (const Xapian::Error &error) { cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl; } parser.set_stemmer(stemmer); parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); } else { #ifdef DEBUG cout << "XapianEngine::parseQuery: no stemming" << endl; #endif parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); } // What's the default operator ? if (defaultOperator == DEFAULT_OP_AND) { parser.set_default_op(Xapian::Query::OP_AND); } else { parser.set_default_op(Xapian::Query::OP_OR); } // X prefixes should always include a colon parser.add_boolean_prefix("site", "H"); parser.add_boolean_prefix("file", "P"); parser.add_boolean_prefix("ext", "XEXT:"); parser.add_prefix("title", "S"); parser.add_boolean_prefix("url", "U"); parser.add_boolean_prefix("dir", "XDIR:"); parser.add_boolean_prefix("lang", "L"); parser.add_boolean_prefix("type", "T"); parser.add_boolean_prefix("class", "XCLASS:"); parser.add_boolean_prefix("label", "XLABEL:"); parser.add_boolean_prefix("tokens", "XTOK:"); // Any limit on what documents should be searched ? if (limitQuery.empty() == false) { string limitedQuery(limitQuery); limitedQuery += " AND ( "; limitedQuery += freeQuery; limitedQuery += " )"; freeQuery = limitedQuery; #ifdef DEBUG cout << "XapianEngine::parseQuery: " << freeQuery << endl; #endif } // Date range Xapian::DateValueRangeProcessor dateProcessor(0); parser.add_valuerangeprocessor(&dateProcessor); // Size with a "b" suffix, ie 1024..10240b #if XAPIAN_NUM_VERSION >= 1001000 Xapian::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #elif XAPIAN_NUM_VERSION >= 1000002 // Xapian 1.02 is the bare minimum Xapian::v102::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #endif // Time range TimeValueRangeProcessor timeProcessor(3); parser.add_valuerangeprocessor(&timeProcessor); // What type of query is this ? QueryProperties::QueryType type = queryProps.getType(); if (type != QueryProperties::XAPIAN_QP) { map<string, string> fieldMapping; // Bare minimum mapping between Xesam fields and our prefixes fieldMapping["dc:title"] = "S"; XapianQueryBuilder builder(parser, fieldMapping); XesamParser *pParser = NULL; // Get a Xesam parser if (type == QueryProperties::XESAM_QL) { pParser = new XesamQLParser(); } #ifdef HAVE_BOOST_SPIRIT_CORE_HPP else if (type == QueryProperties::XESAM_UL) { pParser = new XesamULParser(); } #endif if (pParser != NULL) { bool parsedQuery = pParser->parse(freeQuery, builder); delete pParser; if (parsedQuery == true) { return builder.get_query(); } } return Xapian::Query(); } // Do some pre-processing : look for filters with quoted values string::size_type escapedFilterEnd = 0; string::size_type escapedFilterStart = freeQuery.find(":\""); while ((escapedFilterStart != string::npos) && (escapedFilterStart < freeQuery.length() - 2)) { escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2); if (escapedFilterEnd == string::npos) { break; } string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2); if (filterValue.empty() == false) { string escapedValue(Url::escapeUrl(filterValue)); bool escapeValue = false, hashValue = false; // The value should be escaped and length-limited as done at indexing time checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue); if (escapeValue == false) { // No escaping escapedValue = filterValue; } if (hashValue == true) { // Partially hash if necessary escapedValue = XapianDatabase::limitTermLength(escapedValue, true); } else { escapedValue = XapianDatabase::limitTermLength(escapedValue); } freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart, escapedValue); escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length(); } else { // No value ! freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":"); escapedFilterEnd -= 2; } #ifdef DEBUG cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl; #endif // Next escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd); } // Parse the query string with all necessary options unsigned int flags = Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE| Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE| Xapian::QueryParser::FLAG_PURE_NOT; if (minimal == false) { flags |= Xapian::QueryParser::FLAG_WILDCARD; #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION; #endif } Xapian::Query parsedQuery = parser.parse_query(freeQuery, flags); #ifdef DEBUG cout << "XapianEngine::parseQuery: " << parsedQuery.get_description() << endl; #endif if (minimal == false) { #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 // Any correction ? correctedFreeQuery = parser.get_corrected_query_string(); #ifdef DEBUG if (correctedFreeQuery.empty() == false) { cout << "XapianEngine::parseQuery: corrected spelling to: " << correctedFreeQuery << endl; } #endif #endif } return parsedQuery; }