Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps, const string &stemLanguage, DefaultOperator defaultOperator, const string &limitQuery, string &correctedFreeQuery, bool minimal) { Xapian::QueryParser parser; CJKVTokenizer tokenizer; string freeQuery(queryProps.getFreeQuery()); unsigned int tokensCount = 1; bool diacriticSensitive = queryProps.getDiacriticSensitive(); // Modifying the query is necessary if it's CJKV or diacritics are off if ((tokenizer.has_cjkv(freeQuery) == true) || (diacriticSensitive == false)) { QueryModifier handler(freeQuery, diacriticSensitive, tokenizer.get_ngram_size()); tokenizer.tokenize(freeQuery, handler, true); tokensCount = handler.get_tokens_count(); // We can disable stemming and spelling correction for pure CJKV queries string cjkvQuery(handler.get_modified_query(minimal)); #ifdef DEBUG cout << "XapianEngine::parseQuery: CJKV query is " << cjkvQuery << endl; #endif // Do as if the user had given this as input freeQuery = cjkvQuery; } else { string::size_type spacePos = freeQuery.find(' '); while (spacePos != string::npos) { ++tokensCount; if (spacePos + 1 >= freeQuery.length()) { break; } // Next spacePos = freeQuery.find(' ', spacePos + 1); } } #ifdef DEBUG cout << "XapianEngine::parseQuery: " << tokensCount << " tokens" << endl; #endif if (pIndex != NULL) { // The database is required for wildcards and spelling parser.set_database(*pIndex); } // Set things up if ((minimal == false) && (stemLanguage.empty() == false)) { parser.set_stemmer(m_stemmer); parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); // Don't bother loading the stopwords list if there's only one token if (tokensCount > 1) { FileStopper *pStopper = FileStopper::get_stopper(Languages::toCode(stemLanguage)); if ((pStopper != NULL) && (pStopper->get_stopwords_count() > 0)) { parser.set_stopper(pStopper); } } } else { #ifdef DEBUG cout << "XapianEngine::parseQuery: no stemming" << endl; #endif parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); } // What's the default operator ? if (defaultOperator == DEFAULT_OP_AND) { parser.set_default_op(Xapian::Query::OP_AND); } else { parser.set_default_op(Xapian::Query::OP_OR); } #if XAPIAN_NUM_VERSION >= 1000004 // Search across text body and title parser.add_prefix("", ""); parser.add_prefix("", "S"); #endif // X prefixes should always include a colon parser.add_boolean_prefix("site", "H"); parser.add_boolean_prefix("file", "P"); parser.add_boolean_prefix("ext", "XEXT:"); parser.add_prefix("title", "S"); parser.add_boolean_prefix("url", "U"); parser.add_boolean_prefix("dir", "XDIR:"); parser.add_boolean_prefix("inurl", "XFILE:"); parser.add_prefix("path", "XPATH:"); parser.add_boolean_prefix("lang", "L"); parser.add_boolean_prefix("type", "T"); parser.add_boolean_prefix("class", "XCLASS:"); parser.add_boolean_prefix("label", "XLABEL:"); parser.add_boolean_prefix("tokens", "XTOK:"); // Any limit on what documents should be searched ? if (limitQuery.empty() == false) { string limitedQuery(limitQuery); limitedQuery += " AND ( "; limitedQuery += freeQuery; limitedQuery += " )"; freeQuery = limitedQuery; #ifdef DEBUG cout << "XapianEngine::parseQuery: " << freeQuery << endl; #endif } // Date range Xapian::DateValueRangeProcessor dateProcessor(0); parser.add_valuerangeprocessor(&dateProcessor); // Size with a "b" suffix, ie 1024..10240b #if XAPIAN_NUM_VERSION >= 1001000 Xapian::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #elif XAPIAN_NUM_VERSION >= 1000002 // Xapian 1.02 is the bare minimum Xapian::v102::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #endif // Time range TimeValueRangeProcessor timeProcessor(3); parser.add_valuerangeprocessor(&timeProcessor); // What type of query is this ? QueryProperties::QueryType type = queryProps.getType(); if (type != QueryProperties::XAPIAN_QP) { map<string, string> fieldMapping; // Bare minimum mapping between Xesam fields and our prefixes fieldMapping["dc:title"] = "S"; XapianQueryBuilder builder(parser, fieldMapping); XesamParser *pParser = NULL; bool parsedQuery = false; // Get a Xesam parser if (type == QueryProperties::XESAM_QL) { pParser = new XesamQLParser(); } #ifdef HAVE_BOOST_SPIRIT_CORE_HPP else if (type == QueryProperties::XESAM_UL) { pParser = new XesamULParser(); } #endif if (pParser != NULL) { parsedQuery = pParser->parse(freeQuery, builder); delete pParser; } if (parsedQuery == true) { return builder.get_query(); } return Xapian::Query(); } // Do some pre-processing : look for filters with quoted values string::size_type escapedFilterEnd = 0; string::size_type escapedFilterStart = freeQuery.find(":\""); while ((escapedFilterStart != string::npos) && (escapedFilterStart < freeQuery.length() - 2)) { escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2); if (escapedFilterEnd == string::npos) { break; } string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2); if (filterValue.empty() == false) { string escapedValue(Url::escapeUrl(filterValue)); bool escapeValue = false, hashValue = false; // The value should be escaped and length-limited as done at indexing time checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue); if (escapeValue == false) { // No escaping escapedValue = filterValue; } if (hashValue == true) { // Partially hash if necessary escapedValue = XapianDatabase::limitTermLength(escapedValue, true); } else { escapedValue = XapianDatabase::limitTermLength(escapedValue); } freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart, escapedValue); escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length(); } else { // No value ! freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":"); escapedFilterEnd -= 2; } #ifdef DEBUG cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl; #endif // Next escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd); } // Parse the query string with all necessary options unsigned int flags = Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE| Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_PURE_NOT; if (minimal == false) { flags |= Xapian::QueryParser::FLAG_WILDCARD; #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION; #endif } Xapian::Query parsedQuery = parser.parse_query(freeQuery, flags); #ifdef DEBUG cout << "XapianEngine::parseQuery: " << parsedQuery.get_description() << endl; #endif if (minimal == false) { #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 // Any correction ? correctedFreeQuery = parser.get_corrected_query_string(); #ifdef DEBUG if (correctedFreeQuery.empty() == false) { cout << "XapianEngine::parseQuery: corrected spelling to: " << correctedFreeQuery << endl; } #endif #endif } return parsedQuery; }
void query(const std::vector<std::string>& arr_queries, const std::vector<std::string>& arr_selection = {}) { Xapian::Database databases_ir; try { Xapian::Database database_ir_object_values(path_database+"object_values/"); databases_ir.add_database(database_ir_object_values); } catch (const Xapian::Error &e) { // Database not ready } try { Xapian::Database database_ir_object_descriptions(path_database+"object_descriptions/"); databases_ir.add_database(database_ir_object_descriptions); } catch (const Xapian::Error &e) { // Database not ready } try { Xapian::Database database_ir_object_sub_descriptions(path_database+"object_sub_descriptions/"); databases_ir.add_database(database_ir_object_sub_descriptions); } catch (const Xapian::Error &e) { // Database not ready } // Filter on Type IDs Xapian::Query query_ir_identifiers; if (!arr_selection.empty()) { std::vector<Xapian::Query> arr_query_identifiers; for (const auto& str_identifier_field : arr_selection) { arr_query_identifiers.push_back(Xapian::Query("T"+str_identifier_field)); } query_ir_identifiers = Xapian::Query(Xapian::Query::OP_OR, arr_query_identifiers.begin(), arr_query_identifiers.end()); } Xapian::QueryParser queryparser; queryparser.set_database(databases_ir); // Needed to enable specific query flags queryparser.set_stemmer(Xapian::Stem("en")); queryparser.set_stemming_strategy(queryparser.STEM_SOME); queryparser.add_boolean_prefix("identifier", "T"); //queryparser.add_prefix("value", "SV"); unsigned int count_queries = 0; for (const auto& str_query : arr_queries) { const auto query_id = count_queries; count_queries++; Xapian::Query query_ir = queryparser.parse_query(str_query, Xapian::QueryParser::FLAG_DEFAULT | Xapian::QueryParser::FLAG_WILDCARD); if (!arr_selection.empty()) { // Update main query query_ir = Xapian::Query(Xapian::Query::OP_FILTER, query_ir, query_ir_identifiers); } // Run query Xapian::Enquire enquire(databases_ir); enquire.set_query(query_ir); Xapian::MSet arr_msets = enquire.get_mset(num_offset, num_limit); for (Xapian::MSetIterator iterate_arr_mset = arr_msets.begin(); iterate_arr_mset != arr_msets.end(); iterate_arr_mset++) { //Xapian::docid did = *iterate_arr_mset; const int unsigned& nr_rank = iterate_arr_mset.get_rank(); const int unsigned& nr_weight = iterate_arr_mset.get_weight(); const Xapian::Document doc = iterate_arr_mset.get_document(); const std::string& str_identifier = doc.get_value(0); if (map_query_results.find(str_identifier) == map_query_results.end()) { std::vector<unsigned int> arr_matches; arr_matches.push_back(query_id); const std::string& str_value = (include_value ? doc.get_data() : ""); map_query_results[str_identifier] = std::make_tuple(nr_rank, nr_weight, arr_matches, str_value); } else { type_arr_query_result& arr_query_result = map_query_results[str_identifier]; std::get<0>(arr_query_result) += nr_rank; std::get<1>(arr_query_result) += nr_weight; std::get<2>(arr_query_result).push_back(query_id); } } } }
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps, const string &stemLanguage, bool followOperators) { string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " ")); Xapian::QueryParser parser; Xapian::Stem stemmer; unsigned int minDay, minMonth, minYear = 0; unsigned int maxDay, maxMonth, maxYear = 0; // Set things up if (stemLanguage.empty() == false) { stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage)); parser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); parser.set_stemmer(stemmer); } else { parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); } if (followOperators == true) { parser.set_default_op(Xapian::Query::OP_AND); } else { parser.set_default_op(Xapian::Query::OP_OR); } if (pIndex != NULL) { // The database is required for wildcards parser.set_database(*pIndex); } // ...including prefixes // X prefixes should always include a colon parser.add_boolean_prefix("site", "H"); parser.add_boolean_prefix("file", "P"); parser.add_boolean_prefix("ext", "XEXT:"); parser.add_boolean_prefix("title", "S"); parser.add_boolean_prefix("url", "U"); parser.add_boolean_prefix("dir", "XDIR:"); parser.add_boolean_prefix("lang", "L"); parser.add_boolean_prefix("type", "T"); parser.add_boolean_prefix("label", "XLABEL:"); // Do some pre-processing : look for filters with quoted values string::size_type escapedFilterEnd = 0; string::size_type escapedFilterStart = freeQuery.find(":\""); while ((escapedFilterStart != string::npos) && (escapedFilterStart < freeQuery.length() - 2)) { escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2); if (escapedFilterEnd == string::npos) { break; } string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2); if (filterValue.empty() == false) { string escapedValue(Url::escapeUrl(filterValue)); bool escapeValue = false, hashValue = false; // The value should be escaped and length-limited as done at indexing time checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue); if (escapeValue == false) { // No escaping escapedValue = filterValue; } if (hashValue == true) { // Partially hash if necessary escapedValue = XapianDatabase::limitTermLength(escapedValue, true); } else { escapedValue = XapianDatabase::limitTermLength(escapedValue); } freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart, escapedValue); escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length(); } else { // No value ! freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":"); escapedFilterEnd -= 2; } #ifdef DEBUG cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl; #endif // Next escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd); } // Activate all options and parse Xapian::Query parsedQuery = parser.parse_query(freeQuery, Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE| Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE| #if XAPIAN_MAJOR_VERSION==0 Xapian::QueryParser::FLAG_WILDCARD #else Xapian::QueryParser::FLAG_WILDCARD|Xapian::QueryParser::FLAG_PURE_NOT #endif ); // Apply a date range ? bool enableMin = queryProps.getMinimumDate(minDay, minMonth, minYear); bool enableMax = queryProps.getMaximumDate(maxDay, maxMonth, maxYear); if ((enableMin == false) && (enableMax == false)) { // No return parsedQuery; } // Anyone going as far back as Year 0 is taking the piss :-) if ((enableMin == false) || (minYear == 0)) { minDay = minMonth = 1; minYear = 1970; } // If the second date is older than the Epoch, the first date should be set too if ((enableMax == false) || (maxYear == 0)) { time_t nowTime = time(NULL); struct tm *timeTm = localtime(&nowTime); maxYear = timeTm->tm_year + 1900; maxMonth = timeTm->tm_mon + 1; maxDay = timeTm->tm_mday; } string yyyymmddMin(TimeConverter::toYYYYMMDDString(minYear, minMonth, minDay)); string yyyymmddMax(TimeConverter::toYYYYMMDDString(maxYear, maxMonth, maxDay)); time_t startTime = TimeConverter::fromYYYYMMDDString(yyyymmddMin); time_t endTime = TimeConverter::fromYYYYMMDDString(yyyymmddMax); double diffTime = difftime(endTime, startTime); if (diffTime > 0) { #ifdef DEBUG cout << "XapianEngine::parseQuery: applied date range (" << yyyymmddMax << " <= " << yyyymmddMin << ")" << endl; #endif return Xapian::Query(Xapian::Query::OP_FILTER, parsedQuery, dateFilter(minDay, minMonth, minYear, maxDay, maxMonth, maxYear)); } #ifdef DEBUG else cout << "XapianEngine::parseQuery: date range is zero or bogus (" << yyyymmddMax << " <= " << yyyymmddMin << ")" << endl; #endif return parsedQuery; }
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps, const string &stemLanguage, bool followOperators) { string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " ")); Xapian::QueryParser parser; Xapian::Stem stemmer; // Set things up if (stemLanguage.empty() == false) { stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage)); parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); } else { parser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); } parser.set_stemmer(stemmer); if (followOperators == true) { parser.set_default_op(Xapian::Query::OP_AND); } else { parser.set_default_op(Xapian::Query::OP_OR); } if (pIndex != NULL) { // The database is required for wildcards parser.set_database(*pIndex); } // ...including prefixes // X prefixes should always include a colon parser.add_boolean_prefix("site", "H"); parser.add_boolean_prefix("file", "P"); parser.add_boolean_prefix("title", "S"); parser.add_boolean_prefix("url", "U"); parser.add_boolean_prefix("dir", "XDIR:"); parser.add_boolean_prefix("lang", "L"); parser.add_boolean_prefix("type", "T"); parser.add_boolean_prefix("label", "XLABEL:"); // Activate all options and parse return parser.parse_query(freeQuery, Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE|Xapian::QueryParser::FLAG_WILDCARD); }
ResultIterator CollectionQuery::exec() { Xapian::Database db; try { db = Xapian::Database(QFile::encodeName(d->databaseDir).constData()); } catch (const Xapian::DatabaseError &e) { qWarning() << "Failed to open Xapian database:" << d->databaseDir << "; error:" << QString::fromStdString(e.get_error_string()); return ResultIterator(); } QList<Xapian::Query> queries; if (!d->nameString.isEmpty()) { qDebug() << "searching by name"; Xapian::QueryParser parser; parser.set_database(db); parser.add_prefix("", "N"); parser.set_default_op(Xapian::Query::OP_AND); queries << parser.parse_query(d->nameString.toUtf8().constData(), Xapian::QueryParser::FLAG_PARTIAL); } if (!d->identifierString.isEmpty()) { Xapian::QueryParser parser; parser.set_database(db); parser.add_prefix("", "I"); parser.set_default_op(Xapian::Query::OP_AND); queries << parser.parse_query(d->identifierString.toUtf8().constData(), Xapian::QueryParser::FLAG_PARTIAL); } if (!d->pathString.isEmpty()) { Xapian::QueryParser parser; parser.set_database(db); parser.add_prefix("", "P"); parser.set_default_op(Xapian::Query::OP_AND); queries << parser.parse_query(d->pathString.toUtf8().constData(), Xapian::QueryParser::FLAG_PARTIAL | Xapian::QueryParser::FLAG_PHRASE); } if (!d->ns.isEmpty()) { QList<Xapian::Query> queryList; Q_FOREACH (const QString &n, d->ns) { const QByteArray term = "NS" + n.toUtf8(); queryList << Xapian::Query(term.constData()); } queries << Xapian::Query(Xapian::Query::OP_OR, queryList.begin(), queryList.end()); }
Xapian::Query PIMSearchStore::constructQuery(const QString &property, const QVariant &value, Term::Comparator com) { if (value.isNull()) { return Xapian::Query(); } QString prop = property.toLower(); if (m_boolProperties.contains(prop)) { QString p = m_prefix.value(prop); if (p.isEmpty()) { return Xapian::Query(); } std::string term("B"); bool isTrue = false; if (value.isNull()) { isTrue = true; } if (value.type() == QVariant::Bool) { isTrue = value.toBool(); } if (isTrue) { term += p.toStdString(); } else { term += 'N' + p.toStdString(); } return Xapian::Query(term); } if (m_boolWithValue.contains(prop)) { std::string term(m_prefix.value(prop).toStdString()); std::string val(value.toString().toUtf8().constData()); return Xapian::Query(term + val); } if (m_valueProperties.contains(prop) && (com == Term::Equal || com == Term::Greater || com == Term::GreaterEqual || com == Term::Less || com == Term::LessEqual)) { qlonglong numVal = value.toLongLong(); qDebug() << value << numVal; if (com == Term::Greater) { ++numVal; } if (com == Term::Less) { --numVal; } int valueNumber = m_valueProperties.value(prop); if (com == Term::GreaterEqual || com == Term::Greater) { return Xapian::Query(Xapian::Query::OP_VALUE_GE, valueNumber, QString::number(numVal).toStdString()); } else if (com == Term::LessEqual || com == Term::Less) { return Xapian::Query(Xapian::Query::OP_VALUE_LE, valueNumber, QString::number(numVal).toStdString()); } else if (com == Term::Equal) { const Xapian::Query gtQuery(Xapian::Query::OP_VALUE_GE, valueNumber, QString::number(numVal).toStdString()); const Xapian::Query ltQuery(Xapian::Query::OP_VALUE_LE, valueNumber, QString::number(numVal).toStdString()); return Xapian::Query(Xapian::Query::OP_AND, gtQuery, ltQuery); } } else if ((com == Term::Contains || com == Term::Equal) && m_prefix.contains(prop)) { Xapian::QueryParser parser; parser.set_database(*xapianDb()); std::string p = m_prefix.value(prop).toStdString(); std::string str(value.toString().toUtf8().constData()); int flags = Xapian::QueryParser::FLAG_DEFAULT; if (com == Term::Contains) { flags |= Xapian::QueryParser::FLAG_PARTIAL; } return parser.parse_query(str, flags, p); } return Xapian::Query(value.toString().toStdString()); }
int main(int argc, char **argv) { if(argc < 2) { usage(argv); return 1; } try { char *action = argv[1]; char *db_path = argv[2]; if(!strcmp(action, "index")) { Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN); Xapian::TermGenerator indexer; Xapian::Stem stemmer("english"); indexer.set_stemmer(stemmer); std::string doc_txt; while(true) { if(std::cin.eof()) break; std::string line; getline(std::cin, line); doc_txt += line; } if(!doc_txt.empty()) { Xapian::Document doc; doc.set_data(doc_txt); indexer.set_document(doc); indexer.index_text(doc_txt); db.add_document(doc); std::cout << "Indexed: " << indexer.get_description() << std::endl; } db.commit(); } else if(!strcmp(action, "search")) { if(argc < 4) { std::cerr << "You must supply a query string" << std::endl; return 1; } Xapian::Database db(db_path); Xapian::Enquire enquire(db); std::string query_str = argv[3]; argv+= 4; while(*argv) { query_str += ' '; query_str += *argv++; } Xapian::QueryParser qp; Xapian::Stem stemmer("english"); qp.set_stemmer(stemmer); qp.set_database(db); qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); Xapian::Query query = qp.parse_query(query_str); std::cout << "Parsed query is: " << query.get_description() << std::endl; enquire.set_query(query); Xapian::MSet matches = enquire.get_mset(0, 10); std::cout << matches.get_matches_estimated() << " results found.\n"; std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl; for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { std::cout << i.get_rank() + 1 << ": " << i.get_percent() << "% docid=" << *i << " [" << i.get_document().get_data()<< "]" << std::endl << std::endl; } } else { std::cerr << "Invalid action " << action << std::endl; usage(argv); return 1; } } catch (const Xapian::Error &error) { std::cout << "Exception: " << error.get_msg() << std::endl; } }
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps, const string &stemLanguage, DefaultOperator defaultOperator, const string &limitQuery, string &correctedFreeQuery, bool minimal) { Xapian::QueryParser parser; Xapian::Stem stemmer; CJKVTokenizer tokenizer; string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " ")); unsigned int minDay, minMonth, minYear = 0; unsigned int maxDay, maxMonth, maxYear = 0; if (tokenizer.has_cjkv_only(freeQuery) == true) { vector<string> tokens; string cjkvQuery; tokenizer.tokenize(freeQuery, tokens); // Get the terms for (vector<string>::const_iterator tokenIter = tokens.begin(); tokenIter != tokens.end(); ++tokenIter) { cjkvQuery += *tokenIter; cjkvQuery += " "; } #ifdef DEBUG cout << "XapianEngine::parseQuery: CJKV query is " << cjkvQuery << endl; #endif // Do as if the user had given this as input freeQuery = cjkvQuery; // We can disable stemming and spelling correction minimal = true; } if (pIndex != NULL) { // The database is required for wildcards and spelling parser.set_database(*pIndex); } // Set things up if ((minimal == false) && (stemLanguage.empty() == false)) { #ifdef DEBUG cout << "XapianEngine::parseQuery: " << stemLanguage << " stemming" << endl; #endif try { stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage)); } catch (const Xapian::Error &error) { cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl; } parser.set_stemmer(stemmer); parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); } else { #ifdef DEBUG cout << "XapianEngine::parseQuery: no stemming" << endl; #endif parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); } // What's the default operator ? if (defaultOperator == DEFAULT_OP_AND) { parser.set_default_op(Xapian::Query::OP_AND); } else { parser.set_default_op(Xapian::Query::OP_OR); } // X prefixes should always include a colon parser.add_boolean_prefix("site", "H"); parser.add_boolean_prefix("file", "P"); parser.add_boolean_prefix("ext", "XEXT:"); parser.add_prefix("title", "S"); parser.add_boolean_prefix("url", "U"); parser.add_boolean_prefix("dir", "XDIR:"); parser.add_boolean_prefix("lang", "L"); parser.add_boolean_prefix("type", "T"); parser.add_boolean_prefix("class", "XCLASS:"); parser.add_boolean_prefix("label", "XLABEL:"); parser.add_boolean_prefix("tokens", "XTOK:"); // Any limit on what documents should be searched ? if (limitQuery.empty() == false) { string limitedQuery(limitQuery); limitedQuery += " AND ( "; limitedQuery += freeQuery; limitedQuery += " )"; freeQuery = limitedQuery; #ifdef DEBUG cout << "XapianEngine::parseQuery: " << freeQuery << endl; #endif } // Date range Xapian::DateValueRangeProcessor dateProcessor(0); parser.add_valuerangeprocessor(&dateProcessor); // Size with a "b" suffix, ie 1024..10240b #if XAPIAN_NUM_VERSION >= 1001000 Xapian::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #elif XAPIAN_NUM_VERSION >= 1000002 // Xapian 1.02 is the bare minimum Xapian::v102::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #endif // Time range TimeValueRangeProcessor timeProcessor(3); parser.add_valuerangeprocessor(&timeProcessor); // What type of query is this ? QueryProperties::QueryType type = queryProps.getType(); if (type != QueryProperties::XAPIAN_QP) { map<string, string> fieldMapping; // Bare minimum mapping between Xesam fields and our prefixes fieldMapping["dc:title"] = "S"; XapianQueryBuilder builder(parser, fieldMapping); XesamParser *pParser = NULL; // Get a Xesam parser if (type == QueryProperties::XESAM_QL) { pParser = new XesamQLParser(); } #ifdef HAVE_BOOST_SPIRIT_CORE_HPP else if (type == QueryProperties::XESAM_UL) { pParser = new XesamULParser(); } #endif if (pParser != NULL) { bool parsedQuery = pParser->parse(freeQuery, builder); delete pParser; if (parsedQuery == true) { return builder.get_query(); } } return Xapian::Query(); } // Do some pre-processing : look for filters with quoted values string::size_type escapedFilterEnd = 0; string::size_type escapedFilterStart = freeQuery.find(":\""); while ((escapedFilterStart != string::npos) && (escapedFilterStart < freeQuery.length() - 2)) { escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2); if (escapedFilterEnd == string::npos) { break; } string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2); if (filterValue.empty() == false) { string escapedValue(Url::escapeUrl(filterValue)); bool escapeValue = false, hashValue = false; // The value should be escaped and length-limited as done at indexing time checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue); if (escapeValue == false) { // No escaping escapedValue = filterValue; } if (hashValue == true) { // Partially hash if necessary escapedValue = XapianDatabase::limitTermLength(escapedValue, true); } else { escapedValue = XapianDatabase::limitTermLength(escapedValue); } freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart, escapedValue); escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length(); } else { // No value ! freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":"); escapedFilterEnd -= 2; } #ifdef DEBUG cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl; #endif // Next escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd); } // Parse the query string with all necessary options unsigned int flags = Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE| Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE| Xapian::QueryParser::FLAG_PURE_NOT; if (minimal == false) { flags |= Xapian::QueryParser::FLAG_WILDCARD; #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION; #endif } Xapian::Query parsedQuery = parser.parse_query(freeQuery, flags); #ifdef DEBUG cout << "XapianEngine::parseQuery: " << parsedQuery.get_description() << endl; #endif if (minimal == false) { #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 // Any correction ? correctedFreeQuery = parser.get_corrected_query_string(); #ifdef DEBUG if (correctedFreeQuery.empty() == false) { cout << "XapianEngine::parseQuery: corrected spelling to: " << correctedFreeQuery << endl; } #endif #endif } return parsedQuery; }