void DataSystem::query(const std::string q) { try { // Start an enquire session. Xapian::Enquire enquire(db); Xapian::Query query = qp.parse_query(q); printf("Parsed query is: %s", query.get_description().c_str()); // Find the top 10 results for the query. enquire.set_query(query); Xapian::MSet matches = enquire.get_mset(0, 10); // Display the results. printf("%d results found.\n", matches.get_matches_estimated()); printf("Matches 1-$d:\n", matches.size()); for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { printf("%d: %d\% docid= [%s]\n\n", i.get_rank() + 1, i.get_percent(), i.get_document().get_data().c_str()); } } catch (const Xapian::Error &e) { printf("DataSystem, query error: %s", e.get_description()); } }
int main(int argc, char **argv) { // Simplest possible options parsing: we just require two or more // parameters. if (argc < 3) { cout << "usage: " << argv[0] << " <path to database> <search terms>" << endl; exit(1); } // Catch any Xapian::Error exceptions thrown try { // Make the database Xapian::Database db(argv[1]); // Start an enquire session Xapian::Enquire enquire(db); // Set percent and/or weight cutoffs enquire.set_cutoff(90,0.2); // Set weighting schema BM25Weight bm1(1.0,0.0,1.0,0.5,0.3); enquire.set_weighting_scheme(bm1); // Build the query object Xapian::Query query(Xapian::Query::OP_AND, argv + 2, argv + argc); cout << "Performing query" << query.get_description() << "'" << endl; // Set Stopper string stop[8]={"的","了","呵","吧","就","你","我","他"}; SimpleStopper *ss=new SimpleStopper; for(int i=0;i<8;i++){ ss->add(stop[i]); } QueryParser qparser; qparser.set_stopper(ss); qparser.set_database(db); // Give the query object to the enquire session enquire.set_query(query); // Get the top 10 results of the query Xapian::MSet matches = enquire.get_mset(0, 10); //最多返回10个文档 // Display the results cout << matches.size() << " results found" << endl; for (Xapian::MSetIterator i = matches.begin();i != matches.end(); ++i) { Xapian::Document doc = i.get_document(); cout << "Document ID " << *i << "\nPercent " <<i.get_percent() << "%\n" << doc.get_data() << "\n" << endl; } db.close(); } catch(const Xapian::Error &error) { cout << "Exception: " << error.get_msg() << endl; } }
bool JobSearchSession::serialize_result(Xapian::MSet &matches, string &resp) { JobSearchResult pb_result; //numbers pb_result.set_offset(m_query.offset()); pb_result.set_total_estimated(m_qualified_matches); TB_INFO("matches count:"<<matches.get_matches_estimated()); TB_INFO("matches qualified:"<<m_qualified_matches); //matches for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { if(i.get_weight() == 0){ break; } string data = i.get_document().get_data(); JobSearchResult pb_; pb_.ParseFromString(data); JobSearchResult_ResultItem &item = *(pb_result.add_result_item()); item.set_jid(pb_.result_item(0).jid()); item.set_title(pb_.result_item(0).title()); item.set_location(pb_.result_item(0).location()); item.set_company_id(pb_.result_item(0).company_id()); item.set_company_name(pb_.result_item(0).company_name()); item.set_company_image(pb_.result_item(0).company_image()); item.set_department(pb_.result_item(0).department()); item.set_post_date(pb_.result_item(0).post_date()); item.set_score(i.get_weight()); if(m_query.pb_query.has_debug() && m_query.pb_query.debug()){ item.set_debug_data(m_mid_data_recorder[item.jid()]); } TB_DEBUG("jid :"<<item.jid()); } pb_result.set_count(pb_result.result_item_size()); TB_INFO("matches in range:"<<pb_result.result_item_size()); //stats std::vector<SearchStats::pair_t> top_v; m_stats.topk(top_v,MAX_STATS_ITEMS); for(unsigned i=0;i<top_v.size();i++){ const SearchStats::term_t &key = top_v[i].first; uint32_t value = top_v[i].second; JobSearchResult_StatsItem *stats = pb_result.add_stats_item(); stats->set_stats_type(JOB_STATS_NAME[m_query.task_type]); stats->set_stats_key(key); stats->set_stats_value(value); } //serialize if(false == pb_result.SerializeToString(&resp)){ TB_INFO("serialize response failed"); return false; } return true; }
bool UserSearchSession::serialize_result(Xapian::MSet &matches, string &resp) { UserSearchResult pb_result; //numbers pb_result.set_offset(m_query.offset()); pb_result.set_total_estimated(m_qualified_matches); TB_DEBUG("matches count:"<<matches.get_matches_estimated()); TB_DEBUG("matches qualified:"<<m_qualified_matches); //matches for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { if(i.get_weight() == 0){ break; } string data = i.get_document().get_data(); UserSearchResult_ResultItem *item = pb_result.add_result_item(); item->set_uid(*(uint64_t*)data.data()); item->set_score(i.get_weight()); item->set_link_distance(1); if(m_query.pb_query.has_debug() && m_query.pb_query.debug()){ item->set_debug_data(m_mid_data_recorder[item->uid()]); } TB_DEBUG("uid :"<<item->uid()); } pb_result.set_count(pb_result.result_item_size()); TB_DEBUG("matches in range:"<<pb_result.result_item_size()); //stats std::vector<SearchStats::pair_t> top_v; m_stats.topk(top_v,MAX_STATS_ITEMS); for(unsigned i=0;i<top_v.size();i++){ const SearchStats::term_t &key = top_v[i].first; uint32_t value = top_v[i].second; UserSearchResult_StatsItem *stats = pb_result.add_stats_item(); stats->set_stats_type(USER_STATS_NAME[m_query.task_type]); stats->set_stats_key(key); stats->set_stats_value(value); } //serialize if(false == pb_result.SerializeToString(&resp)){ TB_DEBUG("serialize response failed"); return false; } return true; }
bool XapianEngine::queryDatabase(Xapian::Database *pIndex, Xapian::Query &query, const string &stemLanguage, unsigned int startDoc, const QueryProperties &queryProps) { Timer timer; unsigned int maxResultsCount = queryProps.getMaximumResultsCount(); bool completedQuery = false; if (pIndex == NULL) { return false; } // Start an enquire session on the database Xapian::Enquire enquire(*pIndex); timer.start(); try { AbstractGenerator abstractGen(pIndex, 50); vector<string> seedTerms; // Give the query object to the enquire session enquire.set_query(query); // How should results be sorted ? if (queryProps.getSortOrder() == QueryProperties::RELEVANCE) { // By relevance, only enquire.set_sort_by_relevance_then_value(4); #ifdef DEBUG cout << "XapianEngine::queryDatabase: sorting by relevance first" << endl; #endif } else if (queryProps.getSortOrder() == QueryProperties::DATE) { // By date, and then by relevance enquire.set_sort_by_value_then_relevance(4); #ifdef DEBUG cout << "XapianEngine::queryDatabase: sorting by date and time first" << endl; #endif } // Get the top results of the query Xapian::MSet matches = enquire.get_mset(startDoc, maxResultsCount, (2 * maxResultsCount) + 1); m_resultsCountEstimate = matches.get_matches_estimated(); if (matches.empty() == false) { #ifdef DEBUG cout << "XapianEngine::queryDatabase: found " << matches.size() << "/" << maxResultsCount << " results found from position " << startDoc << endl; cout << "XapianEngine::queryDatabase: estimated " << matches.get_matches_lower_bound() << "/" << m_resultsCountEstimate << "/" << matches.get_matches_upper_bound() << endl; #endif // Get the results for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter) { Xapian::docid docId = *mIter; Xapian::Document doc(mIter.get_document()); // What terms did this document match ? seedTerms.clear(); for (Xapian::TermIterator termIter = enquire.get_matching_terms_begin(docId); termIter != enquire.get_matching_terms_end(docId); ++termIter) { char firstChar = (*termIter)[0]; if (isupper(((int)firstChar)) == 0) { seedTerms.push_back(*termIter); #ifdef DEBUG cout << "XapianEngine::queryDatabase: matched term " << *termIter << endl; #endif } else if (firstChar == 'Z') { string stemmed((*termIter).substr(1)); string::size_type stemmedLen = stemmed.length(); // Which of this document's terms stem to this ? Xapian::TermIterator docTermIter = pIndex->termlist_begin(docId); if (docTermIter != pIndex->termlist_end(docId)) { for (docTermIter.skip_to(stemmed); docTermIter != pIndex->termlist_end(docId); ++docTermIter) { // Is this a potential unstem ? if (strncasecmp((*docTermIter).c_str(), stemmed.c_str(), stemmedLen) != 0) { // No, no point looking at the next terms break; } #ifdef DEBUG cout << "XapianEngine::queryDatabase: matched unstem " << *docTermIter << endl; #endif // FIXME: check this term stems to stemmed ! seedTerms.push_back(*docTermIter); } } } } DocumentInfo thisResult; thisResult.setExtract(abstractGen.generateAbstract(docId, seedTerms)); thisResult.setScore((float)mIter.get_percent()); #ifdef DEBUG cout << "XapianEngine::queryDatabase: found document ID " << docId << endl; #endif XapianDatabase::recordToProps(doc.get_data(), &thisResult); // XapianDatabase stored the language in English thisResult.setLanguage(Languages::toLocale(thisResult.getLanguage())); string url(thisResult.getLocation()); if (url.empty() == true) { // Hmmm this shouldn't be empty... // Use this instead, even though the document isn't cached in the index thisResult.setLocation(XapianDatabase::buildUrl(m_databaseName, docId)); } // We don't know the index ID, just the document ID thisResult.setIsIndexed(0, docId); // Add this result m_resultsList.push_back(thisResult); } } completedQuery = true; } catch (const Xapian::Error &error) { cerr << "Couldn't run query: " << error.get_type() << ": " << error.get_msg() << endl; } cout << "Ran query \"" << queryProps.getFreeQuery() << "\" in " << timer.stop() << " ms" << endl; try { m_expandTerms.clear(); // Expand the query ? if (m_expandDocuments.empty() == false) { Xapian::RSet expandDocs; for (set<string>::const_iterator docIter = m_expandDocuments.begin(); docIter != m_expandDocuments.end(); ++docIter) { string uniqueTerm(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(Url::canonicalizeUrl(*docIter)), true)); // Only one document may have this term Xapian::PostingIterator postingIter = pIndex->postlist_begin(uniqueTerm); if (postingIter != pIndex->postlist_end(uniqueTerm)) { expandDocs.add_document(*postingIter); } } #ifdef DEBUG cout << "XapianEngine::queryDatabase: expand from " << expandDocs.size() << " documents" << endl; #endif // Get 10 non-prefixed terms string allowedPrefixes("RS"); TermDecider expandDecider(pIndex, ((stemLanguage.empty() == true) ? NULL : &m_stemmer), FileStopper::get_stopper(Languages::toCode(stemLanguage)), allowedPrefixes, query); Xapian::ESet expandTerms = enquire.get_eset(10, expandDocs, &expandDecider); #ifdef DEBUG cout << "XapianEngine::queryDatabase: " << expandTerms.size() << " expand terms" << endl; #endif for (Xapian::ESetIterator termIter = expandTerms.begin(); termIter != expandTerms.end(); ++termIter) { string expandTerm(*termIter); char firstChar = expandTerm[0]; // Is this prefixed ? if (allowedPrefixes.find(firstChar) != string::npos) { expandTerm.erase(0, 1); } m_expandTerms.insert(expandTerm); } } } catch (const Xapian::Error &error) { cerr << "Couldn't run query: " << error.get_type() << ": " << error.get_msg() << endl; } // Be tolerant of errors as long as we got some results if ((completedQuery == true) || (m_resultsList.empty() == false)) { return true; } return false; }
bool XapianEngine::queryDatabase(Xapian::Database *pIndex, Xapian::Query &query) { bool completedQuery = false; if (pIndex == NULL) { return false; } // Start an enquire session on the database Xapian::Enquire enquire(*pIndex); try { AbstractGenerator abstractGen(pIndex, 50); vector<string> seedTerms; // Give the query object to the enquire session enquire.set_query(query); // Get the top results of the query Xapian::MSet matches = enquire.get_mset(0, m_maxResultsCount); if (matches.empty() == false) { // Get the results #ifdef DEBUG cout << "XapianEngine::queryDatabase: " << matches.get_matches_estimated() << "/" << m_maxResultsCount << " results found" << endl; #endif for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter) { Xapian::docid docId = *mIter; Xapian::Document doc(mIter.get_document()); // What terms did this document match ? seedTerms.clear(); for (Xapian::TermIterator termIter = enquire.get_matching_terms_begin(docId); termIter != enquire.get_matching_terms_end(docId); ++termIter) { seedTerms.push_back(*termIter); } DocumentInfo thisResult; thisResult.setExtract(abstractGen.generateAbstract(docId, seedTerms)); thisResult.setScore((float)mIter.get_percent()); #ifdef DEBUG cout << "XapianEngine::queryDatabase: found document ID " << docId << endl; #endif XapianDatabase::recordToProps(doc.get_data(), &thisResult); string url(thisResult.getLocation()); if (url.empty() == true) { // Hmmm this shouldn't be empty... // Use this instead, even though the document isn't cached in the index thisResult.setLocation(XapianDatabase::buildUrl(m_databaseName, docId)); } // We don't know the index ID, just the document ID thisResult.setIsIndexed(0, docId); // Add this result m_resultsList.push_back(thisResult); } } completedQuery = true; } catch (const Xapian::Error &error) { cerr << "XapianEngine::queryDatabase: " << error.get_type() << ": " << error.get_msg() << endl; } try { m_expandTerms.clear(); // Expand the query ? if (m_relevantDocuments.empty() == false) { Xapian::RSet relevantDocs; unsigned int count = 0; for (set<unsigned int>::const_iterator docIter = m_relevantDocuments.begin(); docIter != m_relevantDocuments.end(); ++docIter) { relevantDocs.add_document(*docIter); } // Get 10 non-prefixed terms Xapian::ESet expandTerms = enquire.get_eset(20, relevantDocs); for (Xapian::ESetIterator termIter = expandTerms.begin(); (termIter != expandTerms.end()) && (count < 10); ++termIter) { if (isupper((int)((*termIter)[0])) == 0) { m_expandTerms.insert(*termIter); ++count; } } } } catch (const Xapian::Error &error) { cerr << "XapianEngine::queryDatabase: " << error.get_type() << ": " << error.get_msg() << endl; } // Be tolerant of errors as long as we got some results if ((completedQuery == true) || (m_resultsList.empty() == false)) { return true; } return false; }
bool XapianEngine::queryDatabase(Xapian::Query &query) { bool bStatus = false; XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true); if (pDatabase == NULL) { return false; } Xapian::Database *pIndex = pDatabase->readLock(); if (pIndex != NULL) { try { // Start an enquire session on the database Xapian::Enquire enquire(*pIndex); // Give the query object to the enquire session enquire.set_query(query); // Get the top results of the query Xapian::MSet matches = enquire.get_mset(0, m_maxResultsCount); // Get the results #ifdef DEBUG cout << "XapianEngine::queryDatabase: " << matches.get_matches_estimated() << "/" << m_maxResultsCount << " results found" << endl; #endif for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter) { // Get the document data string record = mIter.get_document().get_data(); // Get the title string title = StringManip::extractField(record, "caption=", "\n"); #ifdef DEBUG cout << "XapianEngine::queryDatabase: found omindex title " << title << endl; #endif // Get the URL string url = StringManip::extractField(record, "url=", "\n"); if (url.empty() == true) { // Hmmm this shouldn't be empty... // Use this instead, even though the document isn't cached in the index url = buildUrl(m_databaseName, *mIter); } else { #ifdef DEBUG cout << "XapianEngine::queryDatabase: found omindex URL " << url << endl; #endif url = Url::canonicalizeUrl(url); } // Get the summary and the type string summary = StringManip::extractField(record, "sample=", "\n"); #ifdef DEBUG cout << "XapianEngine::queryDatabase: found omindex summary " << summary << endl; #endif string type = StringManip::extractField(record, "type=", "\n"); // ...and finally the language, if available string language = StringManip::extractField(record, "language=", "\n"); // Add this result Result thisResult(url, title, summary, language, (float)mIter.get_percent()); m_resultsList.push_back(thisResult); } bStatus = true; } catch (const Xapian::Error &error) { cout << "XapianEngine::queryDatabase: couldn't run query: " << error.get_msg() << endl; } } pDatabase->unlock(); return bStatus; }
void QueryHandler(const QueryMessage &message, const Theron::Address from) { search::QueryInfo qi=*(message.query); std::string resKey(message.resKey); delete message.query; std::string segString; char *output=new char[qi.query.length()*9]; char *input=new char[qi.query.length()*3]; memset(output,0,qi.query.length()*9); memset(input,0,qi.query.length()*3); try { UErrorCode error = U_ZERO_ERROR; ucnv_convert("GBK","UTF-8",input, qi.query.length()*3, qi.query.c_str(), qi.query.length(), &error ); bool ret = result->ParagraphProcessing(input, output); if (ret) { int oLen=strlen(output); char *utf8out=new char[oLen*3]; memset(utf8out,0,oLen*3); ucnv_convert("UTF-8","GBK",utf8out, oLen*3, output, oLen, &error ); segString=std::string(utf8out); delete [] utf8out; } } catch (...) { } delete [] output; delete [] input; std::list<std::string> segList; if(segString.length()>0) { std::vector<std::string> resv; boost::algorithm::split( resv, segString, boost::algorithm::is_any_of(" ") ); for(std::vector<std::string>::iterator it=resv.begin();it!=resv.end();++it) { std::vector<std::string> tmpv; boost::algorithm::split( tmpv, *it, boost::algorithm::is_any_of("/") ); if(tmpv.size()>1&&tmpv[1]!="w") segList.push_back(std::string("K")+tmpv[0]); } } search::DocList *dList=new search::DocList(); if(segList.size()>0) { Xapian::Query query(Xapian::Query::OP_AND,segList.begin(), segList.end()); while(1) { try { db.reopen(); Xapian::Enquire enquire(db); enquire.set_query(query); Xapian::MSet matches = enquire.get_mset(0, 100); for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { Xapian::Document doc = i.get_document(); search::IndexInfo info; info.uid=doc.get_value(1); info.attMap.insert(std::make_pair(std::string("title"),doc.get_value(2))); info.content=doc.get_data(); dList->docList.push_back(info); } std::cout<<"doc size:"<<dList->docList.size()<<std::endl; break; }catch(Xapian::DatabaseModifiedError exception) { std::cout<<"try agian"<<std::endl; }catch(...) { break; } } } Send(QueryResponceMessage(dList,resKey.c_str()), from); }
bool XapianEngine::queryDatabase(Xapian::Query &query) { bool bStatus = false; XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true); if (pDatabase == NULL) { return false; } // Get the latest revision... pDatabase->reopen(); Xapian::Database *pIndex = pDatabase->readLock(); if (pIndex != NULL) { try { // Start an enquire session on the database Xapian::Enquire enquire(*pIndex); // Give the query object to the enquire session enquire.set_query(query); // Get the top results of the query Xapian::MSet matches = enquire.get_mset(0, m_maxResultsCount); if (matches.empty() == false) { multimap<Xapian::weight, string> queryTerms; vector<string> seedTerms; Xapian::weight maxWeight = matches.get_max_attained(); // Sort query terms by weight for (Xapian::TermIterator termIter = query.get_terms_begin(); termIter != query.get_terms_end(); ++termIter) { string termName(*termIter); Xapian::weight termWeight = maxWeight - matches.get_termweight(termName); queryTerms.insert(pair<Xapian::weight, string>(termWeight, termName)); #ifdef DEBUG cout << "XapianEngine::queryDatabase: term " << termName << " has weight " << matches.get_termweight(termName) << "/" << maxWeight << endl; #endif } for (multimap<Xapian::weight, string>::iterator weightIter = queryTerms.begin(); weightIter != queryTerms.end(); ++weightIter) { seedTerms.push_back(weightIter->second); } // Get the results #ifdef DEBUG cout << "XapianEngine::queryDatabase: " << matches.get_matches_estimated() << "/" << m_maxResultsCount << " results found" << endl; #endif for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter) { Xapian::docid docId = *mIter; Xapian::Document doc(mIter.get_document()); string record = doc.get_data(); // Get the title string title = StringManip::extractField(record, "caption=", "\n"); #ifdef DEBUG cout << "XapianEngine::queryDatabase: found omindex title " << title << endl; #endif // Get the URL string url = StringManip::extractField(record, "url=", "\n"); if (url.empty() == true) { // Hmmm this shouldn't be empty... // Use this instead, even though the document isn't cached in the index url = XapianDatabase::buildUrl(m_databaseName, *mIter); } else { #ifdef DEBUG cout << "XapianEngine::queryDatabase: found omindex URL " << url << endl; #endif url = Url::canonicalizeUrl(url); } // Get the type string type = StringManip::extractField(record, "type=", "\n"); // ...and the language, if available string language = StringManip::extractField(record, "language=", "\n"); // Finally, get a summary string summary = StringManip::extractField(record, "sample=", "\n"); if (summary.empty() == true) { AbstractGenerator abstractGen(pIndex, 50); // Generate an abstract based on the query's terms summary = abstractGen.generateAbstract(seedTerms, docId); } // Add this result Result thisResult(url, title, summary, language, (float)mIter.get_percent()); m_resultsList.push_back(thisResult); } } m_expandTerms.clear(); // Expand the query ? if (m_relevantDocuments.empty() == false) { Xapian::RSet relevantDocs; unsigned int count = 0; for (set<unsigned int>::const_iterator docIter = m_relevantDocuments.begin(); docIter != m_relevantDocuments.end(); ++docIter) { relevantDocs.add_document(*docIter); } // Get 10 non-prefixed terms Xapian::ESet expandTerms = enquire.get_eset(20, relevantDocs); for (Xapian::ESetIterator termIter = expandTerms.begin(); (termIter != expandTerms.end()) && (count < 10); ++termIter) { if (isupper((int)((*termIter)[0])) == 0) { m_expandTerms.insert(*termIter); ++count; } } } bStatus = true; } catch (const Xapian::Error &error) { cerr << "XapianEngine::queryDatabase: " << error.get_type() << ": " << error.get_msg() << endl; } } pDatabase->unlock(); return bStatus; }
/** Main routine */ int main(int argc,char **argv) { // process inputs that were passed to us via QUERY_STRING std::cout << "Content-Type:application/javascript;charset=utf-8\r\n\n"; std::string callback; try { // get input parameters const char *queryEnv = getenv("QUERY_STRING"); std::string queryString; if (queryEnv) { queryString = queryEnv; } else if (argc>=2) { queryString = argv[1]; } else { std::cout << "No input!\n"; exit(1); } // parse query string std::vector<std::string> parts = split(queryString,'&'); std::string searchFor,callback; int num=1,page=0; for (std::vector<std::string>::const_iterator it=parts.begin();it!=parts.end();++it) { std::vector<std::string> kv = split(*it,'='); if (kv.size()==2) { std::string val = uriDecode(kv[1]); if (kv[0]=="q") searchFor = val; else if (kv[0]=="n") num = fromString<int>(val); else if (kv[0]=="p") page = fromString<int>(val); else if (kv[0]=="cb") callback = val; } } std::string indexDir = "doxysearch.db"; if (queryString=="test") // user test { bool dbOk = dirExists(indexDir); if (dbOk) { std::cout << "Test successful."; } else { std::cout << "Test failed: cannot find search index " << indexDir; } exit(0); } // create query Xapian::Database db(indexDir); Xapian::Enquire enquire(db); Xapian::Query query; std::vector<std::string> words = split(searchFor,' '); for (std::vector<std::string>::const_iterator it=words.begin();it!=words.end();++it) { query = Xapian::Query(Xapian::Query::OP_OR,query,Xapian::Query(*it)); } enquire.set_query(query); // get results Xapian::MSet matches = enquire.get_mset(page*num,num); unsigned int hits = matches.get_matches_estimated(); unsigned int offset = page*num; unsigned int pages = num>0 ? (hits+num-1)/num : 0; if (offset>hits) offset=hits; if (offset+num>hits) num=hits-offset; // write results as JSONP std::cout << callback.c_str() << "("; std::cout << "{" << std::endl << " \"hits\":" << hits << "," << std::endl << " \"first\":" << offset << "," << std::endl << " \"count\":" << num << "," << std::endl << " \"page\":" << page << "," << std::endl << " \"pages\":" << pages << "," << std::endl << " \"query\": \"" << escapeString(searchFor) << "\"," << std::endl << " \"items\":[" << std::endl; // foreach search result unsigned int o = offset; for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i,++o) { std::vector<Fragment> hl; Xapian::Document doc = i.get_document(); highlighter(doc.get_value(FIELD_DOC),words,hl); std::cout << " {\"type\": \"" << doc.get_value(FIELD_TYPE) << "\"," << std::endl << " \"name\": \"" << doc.get_value(FIELD_NAME) << doc.get_value(FIELD_ARGS) << "\"," << std::endl << " \"tag\": \"" << doc.get_value(FIELD_TAG) << "\"," << std::endl << " \"url\": \"" << doc.get_value(FIELD_URL) << "\"," << std::endl; std::cout << " \"fragments\":[" << std::endl; int c=0; bool first=true; for (std::vector<Fragment>::const_iterator it = hl.begin();it!=hl.end() && c<3;++it,++c) { if (!first) std::cout << "," << std::endl; std::cout << " \"" << escapeString((*it).text) << "\""; first=false; } if (!first) std::cout << std::endl; std::cout << " ]" << std::endl; std::cout << " }"; if (o<offset+num-1) std::cout << ","; std::cout << std::endl; } std::cout << " ]" << std::endl << "})" << std::endl; } catch (const Xapian::Error &e) // Xapian exception { showError(callback,e.get_description()); } catch (...) // Any other exception { showError(callback,"Unknown Exception!"); exit(1); } return 0; }
int main(int argc, char **argv) { if(argc < 2) { usage(argv); return 1; } try { char *action = argv[1]; char *db_path = argv[2]; if(!strcmp(action, "index")) { Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN); Xapian::TermGenerator indexer; Xapian::Stem stemmer("english"); indexer.set_stemmer(stemmer); std::string doc_txt; while(true) { if(std::cin.eof()) break; std::string line; getline(std::cin, line); doc_txt += line; } if(!doc_txt.empty()) { Xapian::Document doc; doc.set_data(doc_txt); indexer.set_document(doc); indexer.index_text(doc_txt); db.add_document(doc); std::cout << "Indexed: " << indexer.get_description() << std::endl; } db.commit(); } else if(!strcmp(action, "search")) { if(argc < 4) { std::cerr << "You must supply a query string" << std::endl; return 1; } Xapian::Database db(db_path); Xapian::Enquire enquire(db); std::string query_str = argv[3]; argv+= 4; while(*argv) { query_str += ' '; query_str += *argv++; } Xapian::QueryParser qp; Xapian::Stem stemmer("english"); qp.set_stemmer(stemmer); qp.set_database(db); qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); Xapian::Query query = qp.parse_query(query_str); std::cout << "Parsed query is: " << query.get_description() << std::endl; enquire.set_query(query); Xapian::MSet matches = enquire.get_mset(0, 10); std::cout << matches.get_matches_estimated() << " results found.\n"; std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl; for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { std::cout << i.get_rank() + 1 << ": " << i.get_percent() << "% docid=" << *i << " [" << i.get_document().get_data()<< "]" << std::endl << std::endl; } } else { std::cerr << "Invalid action " << action << std::endl; usage(argv); return 1; } } catch (const Xapian::Error &error) { std::cout << "Exception: " << error.get_msg() << std::endl; } }