Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps,
	const string &stemLanguage, DefaultOperator defaultOperator,
	const string &limitQuery, string &correctedFreeQuery, bool minimal)
{
	Xapian::QueryParser parser;
	CJKVTokenizer tokenizer;
	string freeQuery(queryProps.getFreeQuery());
	unsigned int tokensCount = 1;
	bool diacriticSensitive = queryProps.getDiacriticSensitive();

	// Modifying the query is necessary if it's CJKV or diacritics are off
	if ((tokenizer.has_cjkv(freeQuery) == true) ||
		(diacriticSensitive == false))
	{
		QueryModifier handler(freeQuery,
			diacriticSensitive,
			tokenizer.get_ngram_size());

		tokenizer.tokenize(freeQuery, handler, true);

		tokensCount = handler.get_tokens_count();

		// We can disable stemming and spelling correction for pure CJKV queries
		string cjkvQuery(handler.get_modified_query(minimal));
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: CJKV query is " << cjkvQuery << endl;
#endif

		// Do as if the user had given this as input
		freeQuery = cjkvQuery;
	}
	else
	{
		string::size_type spacePos = freeQuery.find(' ');
		while (spacePos != string::npos)
		{
			++tokensCount;

			if (spacePos + 1 >= freeQuery.length())
			{
				break;
			}

			// Next
			spacePos = freeQuery.find(' ', spacePos + 1);
		}
	}
#ifdef DEBUG
	cout << "XapianEngine::parseQuery: " << tokensCount << " tokens" << endl;
#endif

	if (pIndex != NULL)
	{
		// The database is required for wildcards and spelling
		parser.set_database(*pIndex);
	}

	// Set things up
	if ((minimal == false) &&
		(stemLanguage.empty() == false))
	{
		parser.set_stemmer(m_stemmer);
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);

		// Don't bother loading the stopwords list if there's only one token
		if (tokensCount > 1)
		{
			FileStopper *pStopper = FileStopper::get_stopper(Languages::toCode(stemLanguage));
			if ((pStopper != NULL) &&
				(pStopper->get_stopwords_count() > 0))
			{
				parser.set_stopper(pStopper);
			}
		}
	}
	else
	{
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: no stemming" << endl;
#endif
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
	}
	// What's the default operator ?
	if (defaultOperator == DEFAULT_OP_AND)
	{
		parser.set_default_op(Xapian::Query::OP_AND);
	}
	else
	{
		parser.set_default_op(Xapian::Query::OP_OR);
	}
#if XAPIAN_NUM_VERSION >= 1000004
	// Search across text body and title
	parser.add_prefix("", "");
	parser.add_prefix("", "S");
#endif
	// X prefixes should always include a colon
	parser.add_boolean_prefix("site", "H");
	parser.add_boolean_prefix("file", "P");
	parser.add_boolean_prefix("ext", "XEXT:");
	parser.add_prefix("title", "S");
	parser.add_boolean_prefix("url", "U");
	parser.add_boolean_prefix("dir", "XDIR:");
	parser.add_boolean_prefix("inurl", "XFILE:");
	parser.add_prefix("path", "XPATH:");
	parser.add_boolean_prefix("lang", "L");
	parser.add_boolean_prefix("type", "T");
	parser.add_boolean_prefix("class", "XCLASS:");
	parser.add_boolean_prefix("label", "XLABEL:");
	parser.add_boolean_prefix("tokens", "XTOK:");

	// Any limit on what documents should be searched ?
	if (limitQuery.empty() == false)
	{
		string limitedQuery(limitQuery);

		limitedQuery += " AND ( ";
		limitedQuery += freeQuery;
		limitedQuery += " )";
		freeQuery = limitedQuery;
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: " << freeQuery << endl;
#endif
	}

	// Date range
	Xapian::DateValueRangeProcessor dateProcessor(0);
	parser.add_valuerangeprocessor(&dateProcessor);

	// Size with a "b" suffix, ie 1024..10240b
#if XAPIAN_NUM_VERSION >= 1001000
	Xapian::NumberValueRangeProcessor sizeProcessor(2, "b", false);
	parser.add_valuerangeprocessor(&sizeProcessor);
#elif XAPIAN_NUM_VERSION >= 1000002
	// Xapian 1.02 is the bare minimum
	Xapian::v102::NumberValueRangeProcessor sizeProcessor(2, "b", false);
	parser.add_valuerangeprocessor(&sizeProcessor);
#endif

	// Time range
	TimeValueRangeProcessor timeProcessor(3);
	parser.add_valuerangeprocessor(&timeProcessor);

	// What type of query is this ?
	QueryProperties::QueryType type = queryProps.getType();
	if (type != QueryProperties::XAPIAN_QP)
	{
		map<string, string> fieldMapping;

		// Bare minimum mapping between Xesam fields and our prefixes 
		fieldMapping["dc:title"] = "S";

		XapianQueryBuilder builder(parser, fieldMapping);
		XesamParser *pParser = NULL;
		bool parsedQuery = false;

		// Get a Xesam parser
		if (type == QueryProperties::XESAM_QL)
		{
			pParser = new XesamQLParser();
		}
#ifdef HAVE_BOOST_SPIRIT_CORE_HPP
		else if (type == QueryProperties::XESAM_UL)
		{
			pParser = new XesamULParser();
		}
#endif

		if (pParser != NULL)
		{
			parsedQuery = pParser->parse(freeQuery, builder);

			delete pParser;
		}

		if (parsedQuery == true)
		{
			return builder.get_query();
		}

		return Xapian::Query();
	}

	// Do some pre-processing : look for filters with quoted values
	string::size_type escapedFilterEnd = 0;
	string::size_type escapedFilterStart = freeQuery.find(":\"");
	while ((escapedFilterStart != string::npos) &&
		(escapedFilterStart < freeQuery.length() - 2))
	{
		escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2);
		if (escapedFilterEnd == string::npos)
		{
			break;
		}

		string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2);
		if (filterValue.empty() == false)
		{
			string escapedValue(Url::escapeUrl(filterValue));
			bool escapeValue = false, hashValue = false;

			// The value should be escaped and length-limited as done at indexing time
			checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue);

			if (escapeValue == false)
			{
				// No escaping
				escapedValue = filterValue;
			}
			if (hashValue == true)
			{
				// Partially hash if necessary
				escapedValue = XapianDatabase::limitTermLength(escapedValue, true);
			}
			else
			{
				escapedValue = XapianDatabase::limitTermLength(escapedValue);
			}

			freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart,
				escapedValue);
			escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length();
		}
		else
		{
			// No value !
			freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":");
			escapedFilterEnd -= 2;
		}
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl;
#endif

		// Next
		escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd);
	}

	// Parse the query string with all necessary options
	unsigned int flags = Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE|
		Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_PURE_NOT;
	if (minimal == false)
	{
		flags |= Xapian::QueryParser::FLAG_WILDCARD;
#if ENABLE_XAPIAN_SPELLING_CORRECTION>0
		flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION;
#endif
	}
	Xapian::Query parsedQuery = parser.parse_query(freeQuery, flags);
#ifdef DEBUG
	cout << "XapianEngine::parseQuery: " << parsedQuery.get_description() << endl;
#endif

	if (minimal == false)
	{
#if ENABLE_XAPIAN_SPELLING_CORRECTION>0
		// Any correction ?
		correctedFreeQuery = parser.get_corrected_query_string();
#ifdef DEBUG
		if (correctedFreeQuery.empty() == false)
		{
			cout << "XapianEngine::parseQuery: corrected spelling to: " << correctedFreeQuery << endl;
		}
#endif
#endif
	}

	return parsedQuery;
}
			void query(const std::vector<std::string>& arr_queries, const std::vector<std::string>& arr_selection = {}) {
				
				Xapian::Database databases_ir;
				try {
					Xapian::Database database_ir_object_values(path_database+"object_values/");
					databases_ir.add_database(database_ir_object_values);
				} catch (const Xapian::Error &e) {
					// Database not ready
				}
				try {
					Xapian::Database database_ir_object_descriptions(path_database+"object_descriptions/");
					databases_ir.add_database(database_ir_object_descriptions);
				} catch (const Xapian::Error &e) {
					// Database not ready
				}
				try {
					Xapian::Database database_ir_object_sub_descriptions(path_database+"object_sub_descriptions/");
					databases_ir.add_database(database_ir_object_sub_descriptions);
				} catch (const Xapian::Error &e) {
					// Database not ready
				}
				
				// Filter on Type IDs
				
				Xapian::Query query_ir_identifiers;
				
				if (!arr_selection.empty()) {
					
					std::vector<Xapian::Query> arr_query_identifiers;
					
					for (const auto& str_identifier_field : arr_selection) {

						arr_query_identifiers.push_back(Xapian::Query("T"+str_identifier_field));
					}
					
					query_ir_identifiers = Xapian::Query(Xapian::Query::OP_OR, arr_query_identifiers.begin(), arr_query_identifiers.end());
				}

				Xapian::QueryParser queryparser;
				queryparser.set_database(databases_ir); // Needed to enable specific query flags
				queryparser.set_stemmer(Xapian::Stem("en"));
				queryparser.set_stemming_strategy(queryparser.STEM_SOME);
				queryparser.add_boolean_prefix("identifier", "T");
				//queryparser.add_prefix("value", "SV");
				
				unsigned int count_queries = 0;
				
				for (const auto& str_query : arr_queries) {
					
					const auto query_id = count_queries;
					count_queries++;
						
					Xapian::Query query_ir = queryparser.parse_query(str_query, Xapian::QueryParser::FLAG_DEFAULT | Xapian::QueryParser::FLAG_WILDCARD);
					
					if (!arr_selection.empty()) {
						
						// Update main query
						
						query_ir = Xapian::Query(Xapian::Query::OP_FILTER, query_ir, query_ir_identifiers);
					}
					
					// Run query
					
					Xapian::Enquire enquire(databases_ir);
					enquire.set_query(query_ir);
					
					Xapian::MSet arr_msets = enquire.get_mset(num_offset, num_limit);

					for (Xapian::MSetIterator iterate_arr_mset = arr_msets.begin(); iterate_arr_mset != arr_msets.end(); iterate_arr_mset++) {
											
						//Xapian::docid did = *iterate_arr_mset;
						const int unsigned& nr_rank = iterate_arr_mset.get_rank();
						const int unsigned& nr_weight = iterate_arr_mset.get_weight();
						
						const Xapian::Document doc = iterate_arr_mset.get_document();
						const std::string& str_identifier = doc.get_value(0);
						
						if (map_query_results.find(str_identifier) == map_query_results.end()) {
							
							std::vector<unsigned int> arr_matches;
							
							arr_matches.push_back(query_id);

							const std::string& str_value = (include_value ? doc.get_data() : "");
							
							map_query_results[str_identifier] = std::make_tuple(nr_rank, nr_weight, arr_matches, str_value);
						} else {
							
							type_arr_query_result& arr_query_result = map_query_results[str_identifier];
							
							std::get<0>(arr_query_result) += nr_rank;
							std::get<1>(arr_query_result) += nr_weight;
							
							std::get<2>(arr_query_result).push_back(query_id);
						}
					}
				}
			}
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps,
	const string &stemLanguage, bool followOperators)
{
	string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " "));
	Xapian::QueryParser parser;
	Xapian::Stem stemmer;
	unsigned int minDay, minMonth, minYear = 0;
	unsigned int maxDay, maxMonth, maxYear = 0;

	// Set things up
	if (stemLanguage.empty() == false)
	{
		stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage));
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
		parser.set_stemmer(stemmer);
	}
	else
	{
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
	}
	if (followOperators == true)
	{
		parser.set_default_op(Xapian::Query::OP_AND);
	}
	else
	{
		parser.set_default_op(Xapian::Query::OP_OR);
	}
	if (pIndex != NULL)
	{
		// The database is required for wildcards
		parser.set_database(*pIndex);
	}
	// ...including prefixes
	// X prefixes should always include a colon
	parser.add_boolean_prefix("site", "H");
	parser.add_boolean_prefix("file", "P");
	parser.add_boolean_prefix("ext", "XEXT:");
	parser.add_boolean_prefix("title", "S");
	parser.add_boolean_prefix("url", "U");
	parser.add_boolean_prefix("dir", "XDIR:");
	parser.add_boolean_prefix("lang", "L");
	parser.add_boolean_prefix("type", "T");
	parser.add_boolean_prefix("label", "XLABEL:");

	// Do some pre-processing : look for filters with quoted values
	string::size_type escapedFilterEnd = 0;
	string::size_type escapedFilterStart = freeQuery.find(":\"");
	while ((escapedFilterStart != string::npos) &&
		(escapedFilterStart < freeQuery.length() - 2))
	{
		escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2);
		if (escapedFilterEnd == string::npos)
		{
			break;
		}

		string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2);
		if (filterValue.empty() == false)
		{
			string escapedValue(Url::escapeUrl(filterValue));
			bool escapeValue = false, hashValue = false;

			// The value should be escaped and length-limited as done at indexing time
			checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue);

			if (escapeValue == false)
			{
				// No escaping
				escapedValue = filterValue;
			}
			if (hashValue == true)
			{
				// Partially hash if necessary
				escapedValue = XapianDatabase::limitTermLength(escapedValue, true);
			}
			else
			{
				escapedValue = XapianDatabase::limitTermLength(escapedValue);
			}

			freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart,
				escapedValue);
			escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length();
		}
		else
		{
			// No value !
			freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":");
			escapedFilterEnd -= 2;
		}
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl;
#endif

		// Next
		escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd);
	}

	// Activate all options and parse
	Xapian::Query parsedQuery = parser.parse_query(freeQuery,
		Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE|
		Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE|
#if XAPIAN_MAJOR_VERSION==0
		Xapian::QueryParser::FLAG_WILDCARD
#else
		Xapian::QueryParser::FLAG_WILDCARD|Xapian::QueryParser::FLAG_PURE_NOT
#endif
		);
	// Apply a date range ?
	bool enableMin = queryProps.getMinimumDate(minDay, minMonth, minYear);
	bool enableMax = queryProps.getMaximumDate(maxDay, maxMonth, maxYear);
	if ((enableMin == false) && 
		(enableMax == false))
	{
		// No
		return parsedQuery;
	}

	// Anyone going as far back as Year 0 is taking the piss :-)
	if ((enableMin == false) ||
		(minYear == 0))
	{
		minDay = minMonth = 1;
		minYear = 1970;
	}
	// If the second date is older than the Epoch, the first date should be set too
	if ((enableMax == false) ||
		(maxYear == 0))
	{
		time_t nowTime = time(NULL);
		struct tm *timeTm = localtime(&nowTime);
		maxYear = timeTm->tm_year + 1900;
		maxMonth = timeTm->tm_mon + 1;
		maxDay = timeTm->tm_mday;
	}

	string yyyymmddMin(TimeConverter::toYYYYMMDDString(minYear, minMonth, minDay));
	string yyyymmddMax(TimeConverter::toYYYYMMDDString(maxYear, maxMonth, maxDay));
	time_t startTime = TimeConverter::fromYYYYMMDDString(yyyymmddMin);
	time_t endTime = TimeConverter::fromYYYYMMDDString(yyyymmddMax);
 
	double diffTime = difftime(endTime, startTime);
	if (diffTime > 0)
	{
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: applied date range ("
			<< yyyymmddMax << " <= " << yyyymmddMin << ")" << endl;
#endif
		return Xapian::Query(Xapian::Query::OP_FILTER, parsedQuery,
			dateFilter(minDay, minMonth, minYear, maxDay, maxMonth, maxYear));
	}
#ifdef DEBUG
	else cout << "XapianEngine::parseQuery: date range is zero or bogus ("
		<< yyyymmddMax << " <= " << yyyymmddMin << ")" << endl;
#endif

	return parsedQuery;
}
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps,
	const string &stemLanguage, bool followOperators)
{
	string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " "));
	Xapian::QueryParser parser;
	Xapian::Stem stemmer;

	// Set things up
	if (stemLanguage.empty() == false)
	{
		stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage));
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
	}
	else
	{
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
	}
	parser.set_stemmer(stemmer);
	if (followOperators == true)
	{
		parser.set_default_op(Xapian::Query::OP_AND);
	}
	else
	{
		parser.set_default_op(Xapian::Query::OP_OR);
	}
	if (pIndex != NULL)
	{
		// The database is required for wildcards
		parser.set_database(*pIndex);
	}
	// ...including prefixes
	// X prefixes should always include a colon
	parser.add_boolean_prefix("site", "H");
	parser.add_boolean_prefix("file", "P");
	parser.add_boolean_prefix("title", "S");
	parser.add_boolean_prefix("url", "U");
	parser.add_boolean_prefix("dir", "XDIR:");
	parser.add_boolean_prefix("lang", "L");
	parser.add_boolean_prefix("type", "T");
	parser.add_boolean_prefix("label", "XLABEL:");

	// Activate all options and parse
	return parser.parse_query(freeQuery,
		Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE|Xapian::QueryParser::FLAG_WILDCARD);
}
Beispiel #5
0
ResultIterator CollectionQuery::exec()
{
    Xapian::Database db;
    try {
        db = Xapian::Database(QFile::encodeName(d->databaseDir).constData());
    } catch (const Xapian::DatabaseError &e) {
        qWarning() << "Failed to open Xapian database:" << d->databaseDir
                   << "; error:" << QString::fromStdString(e.get_error_string());
        return ResultIterator();
    }

    QList<Xapian::Query> queries;

    if (!d->nameString.isEmpty()) {
        qDebug() << "searching by name";
        Xapian::QueryParser parser;
        parser.set_database(db);
        parser.add_prefix("", "N");
        parser.set_default_op(Xapian::Query::OP_AND);
        queries << parser.parse_query(d->nameString.toUtf8().constData(),
                                      Xapian::QueryParser::FLAG_PARTIAL);
    }

    if (!d->identifierString.isEmpty()) {
        Xapian::QueryParser parser;
        parser.set_database(db);
        parser.add_prefix("", "I");
        parser.set_default_op(Xapian::Query::OP_AND);
        queries << parser.parse_query(d->identifierString.toUtf8().constData(),
                                      Xapian::QueryParser::FLAG_PARTIAL);
    }

    if (!d->pathString.isEmpty()) {
        Xapian::QueryParser parser;
        parser.set_database(db);
        parser.add_prefix("", "P");
        parser.set_default_op(Xapian::Query::OP_AND);
        queries << parser.parse_query(d->pathString.toUtf8().constData(),
                                      Xapian::QueryParser::FLAG_PARTIAL | Xapian::QueryParser::FLAG_PHRASE);
    }

    if (!d->ns.isEmpty()) {
        QList<Xapian::Query> queryList;
        Q_FOREACH (const QString &n, d->ns) {
            const QByteArray term = "NS" + n.toUtf8();
            queryList << Xapian::Query(term.constData());
        }
        queries << Xapian::Query(Xapian::Query::OP_OR, queryList.begin(), queryList.end());
    }
Beispiel #6
0
Xapian::Query PIMSearchStore::constructQuery(const QString &property, const QVariant &value,
        Term::Comparator com)
{
    if (value.isNull()) {
        return Xapian::Query();
    }

    QString prop = property.toLower();
    if (m_boolProperties.contains(prop)) {
        QString p = m_prefix.value(prop);
        if (p.isEmpty()) {
            return Xapian::Query();
        }

        std::string term("B");
        bool isTrue = false;

        if (value.isNull()) {
            isTrue = true;
        }

        if (value.type() == QVariant::Bool) {
            isTrue = value.toBool();
        }

        if (isTrue) {
            term += p.toStdString();
        } else {
            term += 'N' + p.toStdString();
        }

        return Xapian::Query(term);
    }

    if (m_boolWithValue.contains(prop)) {
        std::string term(m_prefix.value(prop).toStdString());
        std::string val(value.toString().toUtf8().constData());
        return Xapian::Query(term + val);
    }

    if (m_valueProperties.contains(prop) && (com == Term::Equal || com == Term::Greater || com == Term::GreaterEqual || com == Term::Less || com == Term::LessEqual)) {
        qlonglong numVal = value.toLongLong();
        qDebug() << value << numVal;
        if (com == Term::Greater) {
            ++numVal;
        }
        if (com == Term::Less) {
            --numVal;
        }
        int valueNumber = m_valueProperties.value(prop);
        if (com == Term::GreaterEqual || com == Term::Greater) {
            return Xapian::Query(Xapian::Query::OP_VALUE_GE, valueNumber, QString::number(numVal).toStdString());
        } else if (com == Term::LessEqual || com == Term::Less) {
            return Xapian::Query(Xapian::Query::OP_VALUE_LE, valueNumber, QString::number(numVal).toStdString());
        } else if (com == Term::Equal) {
            const Xapian::Query gtQuery(Xapian::Query::OP_VALUE_GE, valueNumber, QString::number(numVal).toStdString());
            const Xapian::Query ltQuery(Xapian::Query::OP_VALUE_LE, valueNumber, QString::number(numVal).toStdString());
            return Xapian::Query(Xapian::Query::OP_AND, gtQuery, ltQuery);
        }
    } else if ((com == Term::Contains || com == Term::Equal) && m_prefix.contains(prop)) {
        Xapian::QueryParser parser;
        parser.set_database(*xapianDb());

        std::string p = m_prefix.value(prop).toStdString();
        std::string str(value.toString().toUtf8().constData());
        int flags = Xapian::QueryParser::FLAG_DEFAULT;
        if (com == Term::Contains) {
            flags |= Xapian::QueryParser::FLAG_PARTIAL;
        }
        return parser.parse_query(str, flags, p);
    }
    return Xapian::Query(value.toString().toStdString());
}
Beispiel #7
0
int main(int argc, char **argv)
{
    if(argc < 2) {
        usage(argv);
        return 1;
    }

    try {
        char *action = argv[1];
        char *db_path = argv[2];

        if(!strcmp(action, "index")) {
            Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN);

            Xapian::TermGenerator indexer;
            Xapian::Stem stemmer("english");
            indexer.set_stemmer(stemmer);

            std::string doc_txt;
            while(true) {
                if(std::cin.eof()) break;

                std::string line;
                getline(std::cin, line);
                doc_txt += line;
            }

            if(!doc_txt.empty()) {
                Xapian::Document doc;
                doc.set_data(doc_txt);

                indexer.set_document(doc);
                indexer.index_text(doc_txt);

                db.add_document(doc);

                std::cout << "Indexed: " << indexer.get_description() << std::endl;
            }

            db.commit();
        } else if(!strcmp(action, "search")) {
            if(argc < 4) {
                std::cerr << "You must supply a query string" << std::endl;
                return 1;
            }

            Xapian::Database db(db_path);
            Xapian::Enquire enquire(db);

            std::string query_str = argv[3];
            argv+= 4;
            while(*argv) {
                query_str += ' ';
                query_str += *argv++;
            }

            Xapian::QueryParser qp;
            Xapian::Stem stemmer("english");
            qp.set_stemmer(stemmer);
            qp.set_database(db);
            qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);

            Xapian::Query query = qp.parse_query(query_str);
            std::cout << "Parsed query is: " << query.get_description() <<
                         std::endl;

            enquire.set_query(query);
            Xapian::MSet matches = enquire.get_mset(0, 10);

            std::cout << matches.get_matches_estimated() << " results found.\n";
            std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl;

            for (Xapian::MSetIterator i = matches.begin();
                    i != matches.end(); ++i) {
                std::cout << i.get_rank() + 1 << ": " << i.get_percent() <<
                        "% docid=" << *i << " [" <<
                        i.get_document().get_data()<< "]" << std::endl <<
                        std::endl;
            }
        } else {
            std::cerr << "Invalid action " << action << std::endl;
            usage(argv);
            return 1;
        }

    } catch (const Xapian::Error &error) {
        std::cout << "Exception: " << error.get_msg() << std::endl;
    }
}
Xapian::Query XapianEngine::parseQuery(Xapian::Database *pIndex, const QueryProperties &queryProps,
	const string &stemLanguage, DefaultOperator defaultOperator,
	const string &limitQuery, string &correctedFreeQuery, bool minimal)
{
	Xapian::QueryParser parser;
	Xapian::Stem stemmer;
	CJKVTokenizer tokenizer;
	string freeQuery(StringManip::replaceSubString(queryProps.getFreeQuery(), "\n", " "));
	unsigned int minDay, minMonth, minYear = 0;
	unsigned int maxDay, maxMonth, maxYear = 0;

	if (tokenizer.has_cjkv_only(freeQuery) == true)
	{
		vector<string> tokens;
		string cjkvQuery;

		tokenizer.tokenize(freeQuery, tokens);

		// Get the terms
		for (vector<string>::const_iterator tokenIter = tokens.begin();
			tokenIter != tokens.end(); ++tokenIter)
		{
			cjkvQuery += *tokenIter;
			cjkvQuery += " ";
		}
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: CJKV query is " << cjkvQuery << endl;
#endif

		// Do as if the user had given this as input
		freeQuery = cjkvQuery;
		// We can disable stemming and spelling correction
		minimal = true;
	}

	if (pIndex != NULL)
	{
		// The database is required for wildcards and spelling
		parser.set_database(*pIndex);
	}

	// Set things up
	if ((minimal == false) &&
		(stemLanguage.empty() == false))
	{
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: " << stemLanguage << " stemming" << endl;
#endif
		try
		{
			stemmer = Xapian::Stem(StringManip::toLowerCase(stemLanguage));
		}
		catch (const Xapian::Error &error)
		{
			cerr << "Couldn't create stemmer: " << error.get_type() << ": " << error.get_msg() << endl;
		}
		parser.set_stemmer(stemmer);
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
	}
	else
	{
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: no stemming" << endl;
#endif
		parser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
	}
	// What's the default operator ?
	if (defaultOperator == DEFAULT_OP_AND)
	{
		parser.set_default_op(Xapian::Query::OP_AND);
	}
	else
	{
		parser.set_default_op(Xapian::Query::OP_OR);
	}
	// X prefixes should always include a colon
	parser.add_boolean_prefix("site", "H");
	parser.add_boolean_prefix("file", "P");
	parser.add_boolean_prefix("ext", "XEXT:");
	parser.add_prefix("title", "S");
	parser.add_boolean_prefix("url", "U");
	parser.add_boolean_prefix("dir", "XDIR:");
	parser.add_boolean_prefix("lang", "L");
	parser.add_boolean_prefix("type", "T");
	parser.add_boolean_prefix("class", "XCLASS:");
	parser.add_boolean_prefix("label", "XLABEL:");
	parser.add_boolean_prefix("tokens", "XTOK:");

	// Any limit on what documents should be searched ?
	if (limitQuery.empty() == false)
	{
		string limitedQuery(limitQuery);

		limitedQuery += " AND ( ";
		limitedQuery += freeQuery;
		limitedQuery += " )";
		freeQuery = limitedQuery;
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: " << freeQuery << endl;
#endif
	}

	// Date range
	Xapian::DateValueRangeProcessor dateProcessor(0);
	parser.add_valuerangeprocessor(&dateProcessor);

	// Size with a "b" suffix, ie 1024..10240b
#if XAPIAN_NUM_VERSION >= 1001000
	Xapian::NumberValueRangeProcessor sizeProcessor(2, "b", false);
	parser.add_valuerangeprocessor(&sizeProcessor);
#elif XAPIAN_NUM_VERSION >= 1000002
	// Xapian 1.02 is the bare minimum
	Xapian::v102::NumberValueRangeProcessor sizeProcessor(2, "b", false);
	parser.add_valuerangeprocessor(&sizeProcessor);
#endif

	// Time range
	TimeValueRangeProcessor timeProcessor(3);
	parser.add_valuerangeprocessor(&timeProcessor);

	// What type of query is this ?
	QueryProperties::QueryType type = queryProps.getType();
	if (type != QueryProperties::XAPIAN_QP)
	{
		map<string, string> fieldMapping;

		// Bare minimum mapping between Xesam fields and our prefixes 
		fieldMapping["dc:title"] = "S";

		XapianQueryBuilder builder(parser, fieldMapping);
		XesamParser *pParser = NULL;

		// Get a Xesam parser
		if (type == QueryProperties::XESAM_QL)
		{
			pParser = new XesamQLParser();
		}
#ifdef HAVE_BOOST_SPIRIT_CORE_HPP
		else if (type == QueryProperties::XESAM_UL)
		{
			pParser = new XesamULParser();
		}
#endif

		if (pParser != NULL)
		{
			bool parsedQuery = pParser->parse(freeQuery, builder);

			delete pParser;

			if (parsedQuery == true)
			{
				return builder.get_query();
			}
		}

		return Xapian::Query();
	}

	// Do some pre-processing : look for filters with quoted values
	string::size_type escapedFilterEnd = 0;
	string::size_type escapedFilterStart = freeQuery.find(":\"");
	while ((escapedFilterStart != string::npos) &&
		(escapedFilterStart < freeQuery.length() - 2))
	{
		escapedFilterEnd = freeQuery.find("\"", escapedFilterStart + 2);
		if (escapedFilterEnd == string::npos)
		{
			break;
		}

		string filterValue = freeQuery.substr(escapedFilterStart + 2, escapedFilterEnd - escapedFilterStart - 2);
		if (filterValue.empty() == false)
		{
			string escapedValue(Url::escapeUrl(filterValue));
			bool escapeValue = false, hashValue = false;

			// The value should be escaped and length-limited as done at indexing time
			checkFilter(freeQuery, escapedFilterStart, escapeValue, hashValue);

			if (escapeValue == false)
			{
				// No escaping
				escapedValue = filterValue;
			}
			if (hashValue == true)
			{
				// Partially hash if necessary
				escapedValue = XapianDatabase::limitTermLength(escapedValue, true);
			}
			else
			{
				escapedValue = XapianDatabase::limitTermLength(escapedValue);
			}

			freeQuery.replace(escapedFilterStart + 1, escapedFilterEnd - escapedFilterStart,
				escapedValue);
			escapedFilterEnd = escapedFilterEnd + escapedValue.length() - filterValue.length();
		}
		else
		{
			// No value !
			freeQuery.replace(escapedFilterStart, escapedFilterEnd - escapedFilterStart + 1, ":");
			escapedFilterEnd -= 2;
		}
#ifdef DEBUG
		cout << "XapianEngine::parseQuery: replaced filter: " << freeQuery << endl;
#endif

		// Next
		escapedFilterStart = freeQuery.find(":\"", escapedFilterEnd);
	}

	// Parse the query string with all necessary options
	unsigned int flags = Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_PHRASE|
		Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE|
		Xapian::QueryParser::FLAG_PURE_NOT;
	if (minimal == false)
	{
		flags |= Xapian::QueryParser::FLAG_WILDCARD;
#if ENABLE_XAPIAN_SPELLING_CORRECTION>0
		flags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION;
#endif
	}
	Xapian::Query parsedQuery = parser.parse_query(freeQuery, flags);
#ifdef DEBUG
	cout << "XapianEngine::parseQuery: " << parsedQuery.get_description() << endl;
#endif

	if (minimal == false)
	{

#if ENABLE_XAPIAN_SPELLING_CORRECTION>0
		// Any correction ?
		correctedFreeQuery = parser.get_corrected_query_string();
#ifdef DEBUG
		if (correctedFreeQuery.empty() == false)
		{
			cout << "XapianEngine::parseQuery: corrected spelling to: " << correctedFreeQuery << endl;
		}
#endif
#endif
	}

	return parsedQuery;
}