/// Validates a query and extracts its terms. bool XapianEngine::validateQuery(QueryProperties& queryProps, bool includePrefixed, vector<string> &terms) { bool goodQuery = false; try { Xapian::Query fullQuery = parseQuery(NULL, queryProps, "", true); if (fullQuery.empty() == false) { for (Xapian::TermIterator termIter = fullQuery.get_terms_begin(); termIter != fullQuery.get_terms_end(); ++termIter) { // Skip prefixed terms unless instructed otherwise if ((includePrefixed == true) || (isupper((int)((*termIter)[0])) == 0)) { terms.push_back(*termIter); } } goodQuery = true; } } catch (const Xapian::Error &error) { cerr << "XapianEngine::validateQuery: " << error.get_type() << ": " << error.get_msg() << endl; } return goodQuery; }
TermDecider(Xapian::Database *pIndex, Xapian::Stem *pStemmer, Xapian::Stopper *pStopper, const string &allowedPrefixes, Xapian::Query &query) : Xapian::ExpandDecider(), m_pIndex(pIndex), m_pStemmer(pStemmer), m_pStopper(pStopper), m_allowedPrefixes(allowedPrefixes), m_pTermsToAvoid(NULL) { m_pTermsToAvoid = new set<string>(); for (Xapian::TermIterator termIter = query.get_terms_begin(); termIter != query.get_terms_end(); ++termIter) { string term(*termIter); if (isupper((int)(term[0])) == 0) { m_pTermsToAvoid->insert(term); if (m_pStemmer != NULL) { string stem((*m_pStemmer)(term)); m_pTermsToAvoid->insert(stem); } } else if (term[0] == 'Z') { m_pTermsToAvoid->insert(term.substr(1)); } } #ifdef DEBUG cout << "TermDecider: avoiding " << m_pTermsToAvoid->size() << " terms" << endl; #endif }
bool XapianEngine::queryDatabase(Xapian::Query &query) { bool bStatus = false; XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true); if (pDatabase == NULL) { return false; } // Get the latest revision... pDatabase->reopen(); Xapian::Database *pIndex = pDatabase->readLock(); if (pIndex != NULL) { try { // Start an enquire session on the database Xapian::Enquire enquire(*pIndex); // Give the query object to the enquire session enquire.set_query(query); // Get the top results of the query Xapian::MSet matches = enquire.get_mset(0, m_maxResultsCount); if (matches.empty() == false) { multimap<Xapian::weight, string> queryTerms; vector<string> seedTerms; Xapian::weight maxWeight = matches.get_max_attained(); // Sort query terms by weight for (Xapian::TermIterator termIter = query.get_terms_begin(); termIter != query.get_terms_end(); ++termIter) { string termName(*termIter); Xapian::weight termWeight = maxWeight - matches.get_termweight(termName); queryTerms.insert(pair<Xapian::weight, string>(termWeight, termName)); #ifdef DEBUG cout << "XapianEngine::queryDatabase: term " << termName << " has weight " << matches.get_termweight(termName) << "/" << maxWeight << endl; #endif } for (multimap<Xapian::weight, string>::iterator weightIter = queryTerms.begin(); weightIter != queryTerms.end(); ++weightIter) { seedTerms.push_back(weightIter->second); } // Get the results #ifdef DEBUG cout << "XapianEngine::queryDatabase: " << matches.get_matches_estimated() << "/" << m_maxResultsCount << " results found" << endl; #endif for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter) { Xapian::docid docId = *mIter; Xapian::Document doc(mIter.get_document()); string record = doc.get_data(); // Get the title string title = StringManip::extractField(record, "caption=", "\n"); #ifdef DEBUG cout << "XapianEngine::queryDatabase: found omindex title " << title << endl; #endif // Get the URL string url = StringManip::extractField(record, "url=", "\n"); if (url.empty() == true) { // Hmmm this shouldn't be empty... // Use this instead, even though the document isn't cached in the index url = XapianDatabase::buildUrl(m_databaseName, *mIter); } else { #ifdef DEBUG cout << "XapianEngine::queryDatabase: found omindex URL " << url << endl; #endif url = Url::canonicalizeUrl(url); } // Get the type string type = StringManip::extractField(record, "type=", "\n"); // ...and the language, if available string language = StringManip::extractField(record, "language=", "\n"); // Finally, get a summary string summary = StringManip::extractField(record, "sample=", "\n"); if (summary.empty() == true) { AbstractGenerator abstractGen(pIndex, 50); // Generate an abstract based on the query's terms summary = abstractGen.generateAbstract(seedTerms, docId); } // Add this result Result thisResult(url, title, summary, language, (float)mIter.get_percent()); m_resultsList.push_back(thisResult); } } m_expandTerms.clear(); // Expand the query ? if (m_relevantDocuments.empty() == false) { Xapian::RSet relevantDocs; unsigned int count = 0; for (set<unsigned int>::const_iterator docIter = m_relevantDocuments.begin(); docIter != m_relevantDocuments.end(); ++docIter) { relevantDocs.add_document(*docIter); } // Get 10 non-prefixed terms Xapian::ESet expandTerms = enquire.get_eset(20, relevantDocs); for (Xapian::ESetIterator termIter = expandTerms.begin(); (termIter != expandTerms.end()) && (count < 10); ++termIter) { if (isupper((int)((*termIter)[0])) == 0) { m_expandTerms.insert(*termIter); ++count; } } } bStatus = true; } catch (const Xapian::Error &error) { cerr << "XapianEngine::queryDatabase: " << error.get_type() << ": " << error.get_msg() << endl; } } pDatabase->unlock(); return bStatus; }
/// Runs a query; true if success. bool XapianEngine::runQuery(QueryProperties& queryProps) { // Clear the results list m_resultsList.clear(); XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true); if (pDatabase == NULL) { return false; } // Get the latest revision... pDatabase->reopen(); Xapian::Database *pIndex = pDatabase->readLock(); try { string stemLanguage; unsigned int searchStep = 1; bool followOperators = true; // Searches are run in this order : // 1. follow operators and don't stem terms // 2. if no results, follow operators and stem terms // 3. if no results, don't follow operators and don't stem terms // 4. if no results, don't follow operators and stem terms // Steps 2 and 4 depend on a language being defined for the query Xapian::Query freeQuery = parseQuery(pIndex, queryProps, "", followOperators); while (freeQuery.empty() == false) { #ifdef DEBUG cout << "XapianEngine::runQuery: query terms are " << endl; for (Xapian::TermIterator termIter = freeQuery.get_terms_begin(); termIter != freeQuery.get_terms_end(); ++termIter) { cout << " " << *termIter << endl; } #endif // Query the database if (queryDatabase(pIndex, freeQuery) == false) { break; } if (m_resultsList.empty() == true) { // The search did succeed but didn't return anything // Try the next step switch (++searchStep) { case 2: followOperators = true; stemLanguage = queryProps.getLanguage(); if (stemLanguage.empty() == false) { break; } ++searchStep; case 3: followOperators = false; stemLanguage.clear(); break; case 4: followOperators = false; stemLanguage = queryProps.getLanguage(); if (stemLanguage.empty() == false) { break; } ++searchStep; default: pDatabase->unlock(); return true; } #ifdef DEBUG cout << "XapianEngine::runQuery: trying step " << searchStep << endl; #endif freeQuery = parseQuery(pIndex, queryProps, Languages::toEnglish(stemLanguage), followOperators); continue; } pDatabase->unlock(); return true; } } catch (const Xapian::Error &error) { cerr << "XapianEngine::runQuery: " << error.get_type() << ": " << error.get_msg() << endl; } pDatabase->unlock(); return false; }