void DataSystem::query(const std::string q) { try { // Start an enquire session. Xapian::Enquire enquire(db); Xapian::Query query = qp.parse_query(q); printf("Parsed query is: %s", query.get_description().c_str()); // Find the top 10 results for the query. enquire.set_query(query); Xapian::MSet matches = enquire.get_mset(0, 10); // Display the results. printf("%d results found.\n", matches.get_matches_estimated()); printf("Matches 1-$d:\n", matches.size()); for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { printf("%d: %d\% docid= [%s]\n\n", i.get_rank() + 1, i.get_percent(), i.get_document().get_data().c_str()); } } catch (const Xapian::Error &e) { printf("DataSystem, query error: %s", e.get_description()); } }
int main(int argc, char **argv) { // Simplest possible options parsing: we just require two or more // parameters. if (argc < 3) { cout << "usage: " << argv[0] << " <path to database> <search terms>" << endl; exit(1); } // Catch any Xapian::Error exceptions thrown try { // Make the database Xapian::Database db(argv[1]); // Start an enquire session Xapian::Enquire enquire(db); // Set percent and/or weight cutoffs enquire.set_cutoff(90,0.2); // Set weighting schema BM25Weight bm1(1.0,0.0,1.0,0.5,0.3); enquire.set_weighting_scheme(bm1); // Build the query object Xapian::Query query(Xapian::Query::OP_AND, argv + 2, argv + argc); cout << "Performing query" << query.get_description() << "'" << endl; // Set Stopper string stop[8]={"的","了","呵","吧","就","你","我","他"}; SimpleStopper *ss=new SimpleStopper; for(int i=0;i<8;i++){ ss->add(stop[i]); } QueryParser qparser; qparser.set_stopper(ss); qparser.set_database(db); // Give the query object to the enquire session enquire.set_query(query); // Get the top 10 results of the query Xapian::MSet matches = enquire.get_mset(0, 10); //最多返回10个文档 // Display the results cout << matches.size() << " results found" << endl; for (Xapian::MSetIterator i = matches.begin();i != matches.end(); ++i) { Xapian::Document doc = i.get_document(); cout << "Document ID " << *i << "\nPercent " <<i.get_percent() << "%\n" << doc.get_data() << "\n" << endl; } db.close(); } catch(const Xapian::Error &error) { cout << "Exception: " << error.get_msg() << endl; } }
bool XapianEngine::queryDatabase(Xapian::Database *pIndex, Xapian::Query &query, const string &stemLanguage, unsigned int startDoc, const QueryProperties &queryProps) { Timer timer; unsigned int maxResultsCount = queryProps.getMaximumResultsCount(); bool completedQuery = false; if (pIndex == NULL) { return false; } // Start an enquire session on the database Xapian::Enquire enquire(*pIndex); timer.start(); try { AbstractGenerator abstractGen(pIndex, 50); vector<string> seedTerms; // Give the query object to the enquire session enquire.set_query(query); // How should results be sorted ? if (queryProps.getSortOrder() == QueryProperties::RELEVANCE) { // By relevance, only enquire.set_sort_by_relevance_then_value(4); #ifdef DEBUG cout << "XapianEngine::queryDatabase: sorting by relevance first" << endl; #endif } else if (queryProps.getSortOrder() == QueryProperties::DATE) { // By date, and then by relevance enquire.set_sort_by_value_then_relevance(4); #ifdef DEBUG cout << "XapianEngine::queryDatabase: sorting by date and time first" << endl; #endif } // Get the top results of the query Xapian::MSet matches = enquire.get_mset(startDoc, maxResultsCount, (2 * maxResultsCount) + 1); m_resultsCountEstimate = matches.get_matches_estimated(); if (matches.empty() == false) { #ifdef DEBUG cout << "XapianEngine::queryDatabase: found " << matches.size() << "/" << maxResultsCount << " results found from position " << startDoc << endl; cout << "XapianEngine::queryDatabase: estimated " << matches.get_matches_lower_bound() << "/" << m_resultsCountEstimate << "/" << matches.get_matches_upper_bound() << endl; #endif // Get the results for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter) { Xapian::docid docId = *mIter; Xapian::Document doc(mIter.get_document()); // What terms did this document match ? seedTerms.clear(); for (Xapian::TermIterator termIter = enquire.get_matching_terms_begin(docId); termIter != enquire.get_matching_terms_end(docId); ++termIter) { char firstChar = (*termIter)[0]; if (isupper(((int)firstChar)) == 0) { seedTerms.push_back(*termIter); #ifdef DEBUG cout << "XapianEngine::queryDatabase: matched term " << *termIter << endl; #endif } else if (firstChar == 'Z') { string stemmed((*termIter).substr(1)); string::size_type stemmedLen = stemmed.length(); // Which of this document's terms stem to this ? Xapian::TermIterator docTermIter = pIndex->termlist_begin(docId); if (docTermIter != pIndex->termlist_end(docId)) { for (docTermIter.skip_to(stemmed); docTermIter != pIndex->termlist_end(docId); ++docTermIter) { // Is this a potential unstem ? if (strncasecmp((*docTermIter).c_str(), stemmed.c_str(), stemmedLen) != 0) { // No, no point looking at the next terms break; } #ifdef DEBUG cout << "XapianEngine::queryDatabase: matched unstem " << *docTermIter << endl; #endif // FIXME: check this term stems to stemmed ! seedTerms.push_back(*docTermIter); } } } } DocumentInfo thisResult; thisResult.setExtract(abstractGen.generateAbstract(docId, seedTerms)); thisResult.setScore((float)mIter.get_percent()); #ifdef DEBUG cout << "XapianEngine::queryDatabase: found document ID " << docId << endl; #endif XapianDatabase::recordToProps(doc.get_data(), &thisResult); // XapianDatabase stored the language in English thisResult.setLanguage(Languages::toLocale(thisResult.getLanguage())); string url(thisResult.getLocation()); if (url.empty() == true) { // Hmmm this shouldn't be empty... // Use this instead, even though the document isn't cached in the index thisResult.setLocation(XapianDatabase::buildUrl(m_databaseName, docId)); } // We don't know the index ID, just the document ID thisResult.setIsIndexed(0, docId); // Add this result m_resultsList.push_back(thisResult); } } completedQuery = true; } catch (const Xapian::Error &error) { cerr << "Couldn't run query: " << error.get_type() << ": " << error.get_msg() << endl; } cout << "Ran query \"" << queryProps.getFreeQuery() << "\" in " << timer.stop() << " ms" << endl; try { m_expandTerms.clear(); // Expand the query ? if (m_expandDocuments.empty() == false) { Xapian::RSet expandDocs; for (set<string>::const_iterator docIter = m_expandDocuments.begin(); docIter != m_expandDocuments.end(); ++docIter) { string uniqueTerm(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(Url::canonicalizeUrl(*docIter)), true)); // Only one document may have this term Xapian::PostingIterator postingIter = pIndex->postlist_begin(uniqueTerm); if (postingIter != pIndex->postlist_end(uniqueTerm)) { expandDocs.add_document(*postingIter); } } #ifdef DEBUG cout << "XapianEngine::queryDatabase: expand from " << expandDocs.size() << " documents" << endl; #endif // Get 10 non-prefixed terms string allowedPrefixes("RS"); TermDecider expandDecider(pIndex, ((stemLanguage.empty() == true) ? NULL : &m_stemmer), FileStopper::get_stopper(Languages::toCode(stemLanguage)), allowedPrefixes, query); Xapian::ESet expandTerms = enquire.get_eset(10, expandDocs, &expandDecider); #ifdef DEBUG cout << "XapianEngine::queryDatabase: " << expandTerms.size() << " expand terms" << endl; #endif for (Xapian::ESetIterator termIter = expandTerms.begin(); termIter != expandTerms.end(); ++termIter) { string expandTerm(*termIter); char firstChar = expandTerm[0]; // Is this prefixed ? if (allowedPrefixes.find(firstChar) != string::npos) { expandTerm.erase(0, 1); } m_expandTerms.insert(expandTerm); } } } catch (const Xapian::Error &error) { cerr << "Couldn't run query: " << error.get_type() << ": " << error.get_msg() << endl; } // Be tolerant of errors as long as we got some results if ((completedQuery == true) || (m_resultsList.empty() == false)) { return true; } return false; }
int main(int argc, char **argv) { if(argc < 2) { usage(argv); return 1; } try { char *action = argv[1]; char *db_path = argv[2]; if(!strcmp(action, "index")) { Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN); Xapian::TermGenerator indexer; Xapian::Stem stemmer("english"); indexer.set_stemmer(stemmer); std::string doc_txt; while(true) { if(std::cin.eof()) break; std::string line; getline(std::cin, line); doc_txt += line; } if(!doc_txt.empty()) { Xapian::Document doc; doc.set_data(doc_txt); indexer.set_document(doc); indexer.index_text(doc_txt); db.add_document(doc); std::cout << "Indexed: " << indexer.get_description() << std::endl; } db.commit(); } else if(!strcmp(action, "search")) { if(argc < 4) { std::cerr << "You must supply a query string" << std::endl; return 1; } Xapian::Database db(db_path); Xapian::Enquire enquire(db); std::string query_str = argv[3]; argv+= 4; while(*argv) { query_str += ' '; query_str += *argv++; } Xapian::QueryParser qp; Xapian::Stem stemmer("english"); qp.set_stemmer(stemmer); qp.set_database(db); qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); Xapian::Query query = qp.parse_query(query_str); std::cout << "Parsed query is: " << query.get_description() << std::endl; enquire.set_query(query); Xapian::MSet matches = enquire.get_mset(0, 10); std::cout << matches.get_matches_estimated() << " results found.\n"; std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl; for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { std::cout << i.get_rank() + 1 << ": " << i.get_percent() << "% docid=" << *i << " [" << i.get_document().get_data()<< "]" << std::endl << std::endl; } } else { std::cerr << "Invalid action " << action << std::endl; usage(argv); return 1; } } catch (const Xapian::Error &error) { std::cout << "Exception: " << error.get_msg() << std::endl; } }