ResultIterator CollectionQuery::exec() { Xapian::Database db; try { db = Xapian::Database(QFile::encodeName(d->databaseDir).constData()); } catch (const Xapian::DatabaseError &e) { qWarning() << "Failed to open Xapian database:" << d->databaseDir << "; error:" << QString::fromStdString(e.get_error_string()); return ResultIterator(); } QList<Xapian::Query> queries; if (!d->nameString.isEmpty()) { qDebug() << "searching by name"; Xapian::QueryParser parser; parser.set_database(db); parser.add_prefix("", "N"); parser.set_default_op(Xapian::Query::OP_AND); queries << parser.parse_query(d->nameString.toUtf8().constData(), Xapian::QueryParser::FLAG_PARTIAL); } if (!d->identifierString.isEmpty()) { Xapian::QueryParser parser; parser.set_database(db); parser.add_prefix("", "I"); parser.set_default_op(Xapian::Query::OP_AND); queries << parser.parse_query(d->identifierString.toUtf8().constData(), Xapian::QueryParser::FLAG_PARTIAL); } if (!d->pathString.isEmpty()) { Xapian::QueryParser parser; parser.set_database(db); parser.add_prefix("", "P"); parser.set_default_op(Xapian::Query::OP_AND); queries << parser.parse_query(d->pathString.toUtf8().constData(), Xapian::QueryParser::FLAG_PARTIAL | Xapian::QueryParser::FLAG_PHRASE); } if (!d->ns.isEmpty()) { QList<Xapian::Query> queryList; Q_FOREACH (const QString &n, d->ns) { const QByteArray term = "NS" + n.toUtf8(); queryList << Xapian::Query(term.constData()); } queries << Xapian::Query(Xapian::Query::OP_OR, queryList.begin(), queryList.end()); }
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps, const string &stemLanguage, bool followOperators) { string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " ")); Xapian::QueryParser parser; Xapian::Stem stemmer; // Set things up if (stemLanguage.empty() == false) { stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage)); parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); } else { parser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); } parser.set_stemmer(stemmer); if (followOperators == true) { parser.set_default_op(Xapian::Query::OP_AND); } else { parser.set_default_op(Xapian::Query::OP_OR); } if (pIndex != NULL) { // The database is required for wildcards parser.set_database(*pIndex); } // ...including prefixes // X prefixes should always include a colon parser.add_boolean_prefix("site", "H"); parser.add_boolean_prefix("file", "P"); parser.add_boolean_prefix("title", "S"); parser.add_boolean_prefix("url", "U"); parser.add_boolean_prefix("dir", "XDIR:"); parser.add_boolean_prefix("lang", "L"); parser.add_boolean_prefix("type", "T"); parser.add_boolean_prefix("label", "XLABEL:"); // Activate all options and parse return parser.parse_query(freeQuery, Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE|Xapian::QueryParser::FLAG_WILDCARD); }
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps, const string &stemLanguage, DefaultOperator defaultOperator, const string &limitQuery, string &correctedFreeQuery, bool minimal) { Xapian::QueryParser parser; CJKVTokenizer tokenizer; string freeQuery(queryProps.getFreeQuery()); unsigned int tokensCount = 1; bool diacriticSensitive = queryProps.getDiacriticSensitive(); // Modifying the query is necessary if it's CJKV or diacritics are off if ((tokenizer.has_cjkv(freeQuery) == true) || (diacriticSensitive == false)) { QueryModifier handler(freeQuery, diacriticSensitive, tokenizer.get_ngram_size()); tokenizer.tokenize(freeQuery, handler, true); tokensCount = handler.get_tokens_count(); // We can disable stemming and spelling correction for pure CJKV queries string cjkvQuery(handler.get_modified_query(minimal)); #ifdef DEBUG cout << "XapianEngine::parseQuery: CJKV query is " << cjkvQuery << endl; #endif // Do as if the user had given this as input freeQuery = cjkvQuery; } else { string::size_type spacePos = freeQuery.find(' '); while (spacePos != string::npos) { ++tokensCount; if (spacePos + 1 >= freeQuery.length()) { break; } // Next spacePos = freeQuery.find(' ', spacePos + 1); } } #ifdef DEBUG cout << "XapianEngine::parseQuery: " << tokensCount << " tokens" << endl; #endif if (pIndex != NULL) { // The database is required for wildcards and spelling parser.set_database(*pIndex); } // Set things up if ((minimal == false) && (stemLanguage.empty() == false)) { parser.set_stemmer(m_stemmer); parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); // Don't bother loading the stopwords list if there's only one token if (tokensCount > 1) { FileStopper *pStopper = FileStopper::get_stopper(Languages::toCode(stemLanguage)); if ((pStopper != NULL) && (pStopper->get_stopwords_count() > 0)) { parser.set_stopper(pStopper); } } } else { #ifdef DEBUG cout << "XapianEngine::parseQuery: no stemming" << endl; #endif parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); } // What's the default operator ? if (defaultOperator == DEFAULT_OP_AND) { parser.set_default_op(Xapian::Query::OP_AND); } else { parser.set_default_op(Xapian::Query::OP_OR); } #if XAPIAN_NUM_VERSION >= 1000004 // Search across text body and title parser.add_prefix("", ""); parser.add_prefix("", "S"); #endif // X prefixes should always include a colon parser.add_boolean_prefix("site", "H"); parser.add_boolean_prefix("file", "P"); parser.add_boolean_prefix("ext", "XEXT:"); parser.add_prefix("title", "S"); parser.add_boolean_prefix("url", "U"); parser.add_boolean_prefix("dir", "XDIR:"); parser.add_boolean_prefix("inurl", "XFILE:"); parser.add_prefix("path", "XPATH:"); parser.add_boolean_prefix("lang", "L"); parser.add_boolean_prefix("type", "T"); parser.add_boolean_prefix("class", "XCLASS:"); parser.add_boolean_prefix("label", "XLABEL:"); parser.add_boolean_prefix("tokens", "XTOK:"); // Any limit on what documents should be searched ? if (limitQuery.empty() == false) { string limitedQuery(limitQuery); limitedQuery += " AND ( "; limitedQuery += freeQuery; limitedQuery += " )"; freeQuery = limitedQuery; #ifdef DEBUG cout << "XapianEngine::parseQuery: " << freeQuery << endl; #endif } // Date range Xapian::DateValueRangeProcessor dateProcessor(0); parser.add_valuerangeprocessor(&dateProcessor); // Size with a "b" suffix, ie 1024..10240b #if XAPIAN_NUM_VERSION >= 1001000 Xapian::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #elif XAPIAN_NUM_VERSION >= 1000002 // Xapian 1.02 is the bare minimum Xapian::v102::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #endif // Time range TimeValueRangeProcessor timeProcessor(3); parser.add_valuerangeprocessor(&timeProcessor); // What type of query is this ? QueryProperties::QueryType type = queryProps.getType(); if (type != QueryProperties::XAPIAN_QP) { map<string, string> fieldMapping; // Bare minimum mapping between Xesam fields and our prefixes fieldMapping["dc:title"] = "S"; XapianQueryBuilder builder(parser, fieldMapping); XesamParser *pParser = NULL; bool parsedQuery = false; // Get a Xesam parser if (type == QueryProperties::XESAM_QL) { pParser = new XesamQLParser(); } #ifdef HAVE_BOOST_SPIRIT_CORE_HPP else if (type == QueryProperties::XESAM_UL) { pParser = new XesamULParser(); } #endif if (pParser != NULL) { parsedQuery = pParser->parse(freeQuery, builder); delete pParser; } if (parsedQuery == true) { return builder.get_query(); } return Xapian::Query(); } // Do some pre-processing : look for filters with quoted values string::size_type escapedFilterEnd = 0; string::size_type escapedFilterStart = freeQuery.find(":\""); while ((escapedFilterStart != string::npos) && (escapedFilterStart < freeQuery.length() - 2)) { escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2); if (escapedFilterEnd == string::npos) { break; } string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2); if (filterValue.empty() == false) { string escapedValue(Url::escapeUrl(filterValue)); bool escapeValue = false, hashValue = false; // The value should be escaped and length-limited as done at indexing time checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue); if (escapeValue == false) { // No escaping escapedValue = filterValue; } if (hashValue == true) { // Partially hash if necessary escapedValue = XapianDatabase::limitTermLength(escapedValue, true); } else { escapedValue = XapianDatabase::limitTermLength(escapedValue); } freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart, escapedValue); escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length(); } else { // No value ! freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":"); escapedFilterEnd -= 2; } #ifdef DEBUG cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl; #endif // Next escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd); } // Parse the query string with all necessary options unsigned int flags = Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE| Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_PURE_NOT; if (minimal == false) { flags |= Xapian::QueryParser::FLAG_WILDCARD; #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION; #endif } Xapian::Query parsedQuery = parser.parse_query(freeQuery, flags); #ifdef DEBUG cout << "XapianEngine::parseQuery: " << parsedQuery.get_description() << endl; #endif if (minimal == false) { #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 // Any correction ? correctedFreeQuery = parser.get_corrected_query_string(); #ifdef DEBUG if (correctedFreeQuery.empty() == false) { cout << "XapianEngine::parseQuery: corrected spelling to: " << correctedFreeQuery << endl; } #endif #endif } return parsedQuery; }
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps, const string &stemLanguage, bool followOperators) { string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " ")); Xapian::QueryParser parser; Xapian::Stem stemmer; unsigned int minDay, minMonth, minYear = 0; unsigned int maxDay, maxMonth, maxYear = 0; // Set things up if (stemLanguage.empty() == false) { stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage)); parser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); parser.set_stemmer(stemmer); } else { parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); } if (followOperators == true) { parser.set_default_op(Xapian::Query::OP_AND); } else { parser.set_default_op(Xapian::Query::OP_OR); } if (pIndex != NULL) { // The database is required for wildcards parser.set_database(*pIndex); } // ...including prefixes // X prefixes should always include a colon parser.add_boolean_prefix("site", "H"); parser.add_boolean_prefix("file", "P"); parser.add_boolean_prefix("ext", "XEXT:"); parser.add_boolean_prefix("title", "S"); parser.add_boolean_prefix("url", "U"); parser.add_boolean_prefix("dir", "XDIR:"); parser.add_boolean_prefix("lang", "L"); parser.add_boolean_prefix("type", "T"); parser.add_boolean_prefix("label", "XLABEL:"); // Do some pre-processing : look for filters with quoted values string::size_type escapedFilterEnd = 0; string::size_type escapedFilterStart = freeQuery.find(":\""); while ((escapedFilterStart != string::npos) && (escapedFilterStart < freeQuery.length() - 2)) { escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2); if (escapedFilterEnd == string::npos) { break; } string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2); if (filterValue.empty() == false) { string escapedValue(Url::escapeUrl(filterValue)); bool escapeValue = false, hashValue = false; // The value should be escaped and length-limited as done at indexing time checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue); if (escapeValue == false) { // No escaping escapedValue = filterValue; } if (hashValue == true) { // Partially hash if necessary escapedValue = XapianDatabase::limitTermLength(escapedValue, true); } else { escapedValue = XapianDatabase::limitTermLength(escapedValue); } freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart, escapedValue); escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length(); } else { // No value ! freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":"); escapedFilterEnd -= 2; } #ifdef DEBUG cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl; #endif // Next escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd); } // Activate all options and parse Xapian::Query parsedQuery = parser.parse_query(freeQuery, Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE| Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE| #if XAPIAN_MAJOR_VERSION==0 Xapian::QueryParser::FLAG_WILDCARD #else Xapian::QueryParser::FLAG_WILDCARD|Xapian::QueryParser::FLAG_PURE_NOT #endif ); // Apply a date range ? bool enableMin = queryProps.getMinimumDate(minDay, minMonth, minYear); bool enableMax = queryProps.getMaximumDate(maxDay, maxMonth, maxYear); if ((enableMin == false) && (enableMax == false)) { // No return parsedQuery; } // Anyone going as far back as Year 0 is taking the piss :-) if ((enableMin == false) || (minYear == 0)) { minDay = minMonth = 1; minYear = 1970; } // If the second date is older than the Epoch, the first date should be set too if ((enableMax == false) || (maxYear == 0)) { time_t nowTime = time(NULL); struct tm *timeTm = localtime(&nowTime); maxYear = timeTm->tm_year + 1900; maxMonth = timeTm->tm_mon + 1; maxDay = timeTm->tm_mday; } string yyyymmddMin(TimeConverter::toYYYYMMDDString(minYear, minMonth, minDay)); string yyyymmddMax(TimeConverter::toYYYYMMDDString(maxYear, maxMonth, maxDay)); time_t startTime = TimeConverter::fromYYYYMMDDString(yyyymmddMin); time_t endTime = TimeConverter::fromYYYYMMDDString(yyyymmddMax); double diffTime = difftime(endTime, startTime); if (diffTime > 0) { #ifdef DEBUG cout << "XapianEngine::parseQuery: applied date range (" << yyyymmddMax << " <= " << yyyymmddMin << ")" << endl; #endif return Xapian::Query(Xapian::Query::OP_FILTER, parsedQuery, dateFilter(minDay, minMonth, minYear, maxDay, maxMonth, maxYear)); } #ifdef DEBUG else cout << "XapianEngine::parseQuery: date range is zero or bogus (" << yyyymmddMax << " <= " << yyyymmddMin << ")" << endl; #endif return parsedQuery; }
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps, const string &stemLanguage, DefaultOperator defaultOperator, const string &limitQuery, string &correctedFreeQuery, bool minimal) { Xapian::QueryParser parser; Xapian::Stem stemmer; CJKVTokenizer tokenizer; string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " ")); unsigned int minDay, minMonth, minYear = 0; unsigned int maxDay, maxMonth, maxYear = 0; if (tokenizer.has_cjkv_only(freeQuery) == true) { vector<string> tokens; string cjkvQuery; tokenizer.tokenize(freeQuery, tokens); // Get the terms for (vector<string>::const_iterator tokenIter = tokens.begin(); tokenIter != tokens.end(); ++tokenIter) { cjkvQuery += *tokenIter; cjkvQuery += " "; } #ifdef DEBUG cout << "XapianEngine::parseQuery: CJKV query is " << cjkvQuery << endl; #endif // Do as if the user had given this as input freeQuery = cjkvQuery; // We can disable stemming and spelling correction minimal = true; } if (pIndex != NULL) { // The database is required for wildcards and spelling parser.set_database(*pIndex); } // Set things up if ((minimal == false) && (stemLanguage.empty() == false)) { #ifdef DEBUG cout << "XapianEngine::parseQuery: " << stemLanguage << " stemming" << endl; #endif try { stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage)); } catch (const Xapian::Error &error) { cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl; } parser.set_stemmer(stemmer); parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); } else { #ifdef DEBUG cout << "XapianEngine::parseQuery: no stemming" << endl; #endif parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); } // What's the default operator ? if (defaultOperator == DEFAULT_OP_AND) { parser.set_default_op(Xapian::Query::OP_AND); } else { parser.set_default_op(Xapian::Query::OP_OR); } // X prefixes should always include a colon parser.add_boolean_prefix("site", "H"); parser.add_boolean_prefix("file", "P"); parser.add_boolean_prefix("ext", "XEXT:"); parser.add_prefix("title", "S"); parser.add_boolean_prefix("url", "U"); parser.add_boolean_prefix("dir", "XDIR:"); parser.add_boolean_prefix("lang", "L"); parser.add_boolean_prefix("type", "T"); parser.add_boolean_prefix("class", "XCLASS:"); parser.add_boolean_prefix("label", "XLABEL:"); parser.add_boolean_prefix("tokens", "XTOK:"); // Any limit on what documents should be searched ? if (limitQuery.empty() == false) { string limitedQuery(limitQuery); limitedQuery += " AND ( "; limitedQuery += freeQuery; limitedQuery += " )"; freeQuery = limitedQuery; #ifdef DEBUG cout << "XapianEngine::parseQuery: " << freeQuery << endl; #endif } // Date range Xapian::DateValueRangeProcessor dateProcessor(0); parser.add_valuerangeprocessor(&dateProcessor); // Size with a "b" suffix, ie 1024..10240b #if XAPIAN_NUM_VERSION >= 1001000 Xapian::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #elif XAPIAN_NUM_VERSION >= 1000002 // Xapian 1.02 is the bare minimum Xapian::v102::NumberValueRangeProcessor sizeProcessor(2, "b", false); parser.add_valuerangeprocessor(&sizeProcessor); #endif // Time range TimeValueRangeProcessor timeProcessor(3); parser.add_valuerangeprocessor(&timeProcessor); // What type of query is this ? QueryProperties::QueryType type = queryProps.getType(); if (type != QueryProperties::XAPIAN_QP) { map<string, string> fieldMapping; // Bare minimum mapping between Xesam fields and our prefixes fieldMapping["dc:title"] = "S"; XapianQueryBuilder builder(parser, fieldMapping); XesamParser *pParser = NULL; // Get a Xesam parser if (type == QueryProperties::XESAM_QL) { pParser = new XesamQLParser(); } #ifdef HAVE_BOOST_SPIRIT_CORE_HPP else if (type == QueryProperties::XESAM_UL) { pParser = new XesamULParser(); } #endif if (pParser != NULL) { bool parsedQuery = pParser->parse(freeQuery, builder); delete pParser; if (parsedQuery == true) { return builder.get_query(); } } return Xapian::Query(); } // Do some pre-processing : look for filters with quoted values string::size_type escapedFilterEnd = 0; string::size_type escapedFilterStart = freeQuery.find(":\""); while ((escapedFilterStart != string::npos) && (escapedFilterStart < freeQuery.length() - 2)) { escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2); if (escapedFilterEnd == string::npos) { break; } string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2); if (filterValue.empty() == false) { string escapedValue(Url::escapeUrl(filterValue)); bool escapeValue = false, hashValue = false; // The value should be escaped and length-limited as done at indexing time checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue); if (escapeValue == false) { // No escaping escapedValue = filterValue; } if (hashValue == true) { // Partially hash if necessary escapedValue = XapianDatabase::limitTermLength(escapedValue, true); } else { escapedValue = XapianDatabase::limitTermLength(escapedValue); } freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart, escapedValue); escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length(); } else { // No value ! freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":"); escapedFilterEnd -= 2; } #ifdef DEBUG cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl; #endif // Next escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd); } // Parse the query string with all necessary options unsigned int flags = Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE| Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE| Xapian::QueryParser::FLAG_PURE_NOT; if (minimal == false) { flags |= Xapian::QueryParser::FLAG_WILDCARD; #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION; #endif } Xapian::Query parsedQuery = parser.parse_query(freeQuery, flags); #ifdef DEBUG cout << "XapianEngine::parseQuery: " << parsedQuery.get_description() << endl; #endif if (minimal == false) { #if ENABLE_XAPIAN_SPELLING_CORRECTION>0 // Any correction ? correctedFreeQuery = parser.get_corrected_query_string(); #ifdef DEBUG if (correctedFreeQuery.empty() == false) { cout << "XapianEngine::parseQuery: corrected spelling to: " << correctedFreeQuery << endl; } #endif #endif } return parsedQuery; }