Ejemplo n.º 1
0
void DataSystem::query(const std::string q)
{
	try {
		// Start an enquire session.
		Xapian::Enquire enquire(db);

		Xapian::Query query = qp.parse_query(q);
		printf("Parsed query is: %s", query.get_description().c_str());

		// Find the top 10 results for the query.
		enquire.set_query(query);
		Xapian::MSet matches = enquire.get_mset(0, 10);

		// Display the results.
		printf("%d results found.\n", matches.get_matches_estimated());
		printf("Matches 1-$d:\n", matches.size());

		for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i)
	{
			printf("%d: %d\% docid= [%s]\n\n",
				   i.get_rank() + 1,
				   i.get_percent(),
				   i.get_document().get_data().c_str());
		}
	} catch (const Xapian::Error &e)
	{
		printf("DataSystem, query error: %s", e.get_description());
	}
}
Ejemplo n.º 2
0
int main(int argc, char **argv)
{
    // Simplest possible options parsing: we just require two or more
    // parameters.
    if (argc < 3) {
        cout << "usage: " << argv[0] << " <path to database> <search terms>" << endl;
        exit(1);
    }
 
    // Catch any Xapian::Error exceptions thrown
    try {
        // Make the database
	Xapian::Database db(argv[1]);
 
        // Start an enquire session
	Xapian::Enquire enquire(db);
         
        // Set percent and/or weight cutoffs
        enquire.set_cutoff(90,0.2);
         
        // Set weighting schema
        BM25Weight bm1(1.0,0.0,1.0,0.5,0.3);
        enquire.set_weighting_scheme(bm1);
 
        // Build the query object
	Xapian::Query query(Xapian::Query::OP_AND, argv + 2, argv + argc);
        cout << "Performing query" << query.get_description() << "'" << endl;
	
        // Set Stopper
        string stop[8]={"的","了","呵","吧","就","你","我","他"};
        SimpleStopper *ss=new SimpleStopper;
        for(int i=0;i<8;i++){
            ss->add(stop[i]);
        }
        QueryParser qparser;
        qparser.set_stopper(ss);
        qparser.set_database(db);
 
        // Give the query object to the enquire session
        enquire.set_query(query);
 
        // Get the top 10 results of the query
	Xapian::MSet matches = enquire.get_mset(0, 10);                     //最多返回10个文档
 
        // Display the results
        cout << matches.size() << " results found" << endl;
 
        for (Xapian::MSetIterator i = matches.begin();i != matches.end(); ++i) {
	    Xapian::Document doc = i.get_document();
            cout << "Document ID " << *i << "\nPercent " <<i.get_percent() << "%\n" << doc.get_data() << "\n" << endl;
        }
        db.close();
    } catch(const Xapian::Error &error) {
        cout << "Exception: "  << error.get_msg() << endl;
    }
}
Ejemplo n.º 3
0
bool 
JobSearchSession::serialize_result(Xapian::MSet &matches, string &resp)
{
    JobSearchResult pb_result;
    //numbers
    pb_result.set_offset(m_query.offset());
    pb_result.set_total_estimated(m_qualified_matches);
    TB_INFO("matches count:"<<matches.get_matches_estimated());
    TB_INFO("matches qualified:"<<m_qualified_matches);
    //matches
    for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
        if(i.get_weight() == 0){
            break;
        }
        string data = i.get_document().get_data();
        JobSearchResult pb_;
        pb_.ParseFromString(data);
        
        JobSearchResult_ResultItem &item = *(pb_result.add_result_item());
        item.set_jid(pb_.result_item(0).jid());
        item.set_title(pb_.result_item(0).title());
        item.set_location(pb_.result_item(0).location());
        item.set_company_id(pb_.result_item(0).company_id());
        item.set_company_name(pb_.result_item(0).company_name());
        item.set_company_image(pb_.result_item(0).company_image());
        item.set_department(pb_.result_item(0).department());
        item.set_post_date(pb_.result_item(0).post_date());
        item.set_score(i.get_weight()); 
        if(m_query.pb_query.has_debug() && m_query.pb_query.debug()){
            item.set_debug_data(m_mid_data_recorder[item.jid()]);
        }
        TB_DEBUG("jid :"<<item.jid());
    }
    pb_result.set_count(pb_result.result_item_size());
    TB_INFO("matches in range:"<<pb_result.result_item_size());

    //stats
    std::vector<SearchStats::pair_t> top_v;
    m_stats.topk(top_v,MAX_STATS_ITEMS);
    for(unsigned i=0;i<top_v.size();i++){
        const SearchStats::term_t &key = top_v[i].first;
        uint32_t value = top_v[i].second;
        JobSearchResult_StatsItem *stats = pb_result.add_stats_item();
        stats->set_stats_type(JOB_STATS_NAME[m_query.task_type]);
        stats->set_stats_key(key);
        stats->set_stats_value(value);
    }
    
    //serialize
    if(false == pb_result.SerializeToString(&resp)){
        TB_INFO("serialize response failed");
        return false;
    }
    return true;
}
Ejemplo n.º 4
0
bool 
UserSearchSession::serialize_result(Xapian::MSet &matches, string &resp)
{
    UserSearchResult pb_result;
    //numbers
    pb_result.set_offset(m_query.offset());
    pb_result.set_total_estimated(m_qualified_matches);
    TB_DEBUG("matches count:"<<matches.get_matches_estimated());
    TB_DEBUG("matches qualified:"<<m_qualified_matches);
    //matches
    for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
        if(i.get_weight() == 0){
            break;
        }
        string data = i.get_document().get_data();
        UserSearchResult_ResultItem *item = pb_result.add_result_item();
        item->set_uid(*(uint64_t*)data.data()); 
        item->set_score(i.get_weight()); 
        item->set_link_distance(1); 
        if(m_query.pb_query.has_debug() && m_query.pb_query.debug()){
            item->set_debug_data(m_mid_data_recorder[item->uid()]);
        }
        TB_DEBUG("uid :"<<item->uid());
    }
    pb_result.set_count(pb_result.result_item_size());
    TB_DEBUG("matches in range:"<<pb_result.result_item_size());

    //stats
    std::vector<SearchStats::pair_t> top_v;
    m_stats.topk(top_v,MAX_STATS_ITEMS);
    for(unsigned i=0;i<top_v.size();i++){
        const SearchStats::term_t &key = top_v[i].first;
        uint32_t value = top_v[i].second;
        UserSearchResult_StatsItem *stats = pb_result.add_stats_item();
        stats->set_stats_type(USER_STATS_NAME[m_query.task_type]);
        stats->set_stats_key(key);
        stats->set_stats_value(value);
    }
    //serialize
    if(false == pb_result.SerializeToString(&resp)){
        TB_DEBUG("serialize response failed");
        return false;
    }
    return true;
}
Ejemplo n.º 5
0
bool XapianEngine::queryDatabase(Xapian::Database *pIndex, Xapian::Query &query,
	const string &stemLanguage, unsigned int startDoc, const QueryProperties &queryProps)
{
	Timer timer;
	unsigned int maxResultsCount = queryProps.getMaximumResultsCount();
	bool completedQuery = false;

	if (pIndex == NULL)
	{
		return false;
	}

	// Start an enquire session on the database
	Xapian::Enquire enquire(*pIndex);

	timer.start();
	try
	{
		AbstractGenerator abstractGen(pIndex, 50);
		vector<string> seedTerms;

		// Give the query object to the enquire session
		enquire.set_query(query);
		// How should results be sorted ?
		if (queryProps.getSortOrder() == QueryProperties::RELEVANCE)
		{
			// By relevance, only
			enquire.set_sort_by_relevance_then_value(4);
#ifdef DEBUG
			cout << "XapianEngine::queryDatabase: sorting by relevance first" << endl;
#endif
		}
		else if (queryProps.getSortOrder() == QueryProperties::DATE)
		{
			// By date, and then by relevance
			enquire.set_sort_by_value_then_relevance(4);
#ifdef DEBUG
			cout << "XapianEngine::queryDatabase: sorting by date and time first" << endl;
#endif
		}

		// Get the top results of the query
		Xapian::MSet matches = enquire.get_mset(startDoc, maxResultsCount, (2 * maxResultsCount) + 1);
		m_resultsCountEstimate = matches.get_matches_estimated();
		if (matches.empty() == false)
		{
#ifdef DEBUG
			cout << "XapianEngine::queryDatabase: found " << matches.size() << "/" << maxResultsCount
				<< " results found from position " << startDoc << endl;
			cout << "XapianEngine::queryDatabase: estimated " << matches.get_matches_lower_bound()
				<< "/" << m_resultsCountEstimate << "/" << matches.get_matches_upper_bound() << endl;
#endif

			// Get the results
			for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter)
			{
				Xapian::docid docId = *mIter;
				Xapian::Document doc(mIter.get_document());

				// What terms did this document match ?
				seedTerms.clear();
				for (Xapian::TermIterator termIter = enquire.get_matching_terms_begin(docId);
					termIter != enquire.get_matching_terms_end(docId); ++termIter)
				{
					char firstChar = (*termIter)[0];

					if (isupper(((int)firstChar)) == 0)
					{
						seedTerms.push_back(*termIter);
#ifdef DEBUG
						cout << "XapianEngine::queryDatabase: matched term " << *termIter << endl;
#endif
					}
					else if (firstChar == 'Z')
					{
						string stemmed((*termIter).substr(1));
						string::size_type stemmedLen = stemmed.length();

						// Which of this document's terms stem to this ?
						Xapian::TermIterator docTermIter = pIndex->termlist_begin(docId);
						if (docTermIter != pIndex->termlist_end(docId))
						{
							for (docTermIter.skip_to(stemmed);
								docTermIter != pIndex->termlist_end(docId); ++docTermIter)
							{
								// Is this a potential unstem ?
								if (strncasecmp((*docTermIter).c_str(), stemmed.c_str(), stemmedLen) != 0)
								{
									// No, no point looking at the next terms
									break;
								}
#ifdef DEBUG
								cout << "XapianEngine::queryDatabase: matched unstem " << *docTermIter << endl;
#endif

								// FIXME: check this term stems to stemmed !
								seedTerms.push_back(*docTermIter); 
							}
						}
					}
				}

				DocumentInfo thisResult;
				thisResult.setExtract(abstractGen.generateAbstract(docId, seedTerms));
				thisResult.setScore((float)mIter.get_percent());

#ifdef DEBUG
				cout << "XapianEngine::queryDatabase: found document ID " << docId << endl;
#endif
				XapianDatabase::recordToProps(doc.get_data(), &thisResult);
				// XapianDatabase stored the language in English
				thisResult.setLanguage(Languages::toLocale(thisResult.getLanguage()));

				string url(thisResult.getLocation());
				if (url.empty() == true)
				{
					// Hmmm this shouldn't be empty...
					// Use this instead, even though the document isn't cached in the index
					thisResult.setLocation(XapianDatabase::buildUrl(m_databaseName, docId));
				}

				// We don't know the index ID, just the document ID
				thisResult.setIsIndexed(0, docId);

				// Add this result
				m_resultsList.push_back(thisResult);
			}
		}

		completedQuery = true;
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't run query: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	cout << "Ran query \"" << queryProps.getFreeQuery() << "\" in " << timer.stop() << " ms" << endl;

	try
	{
		m_expandTerms.clear();

		// Expand the query ?
		if (m_expandDocuments.empty() == false)
		{
			Xapian::RSet expandDocs;

			for (set<string>::const_iterator docIter = m_expandDocuments.begin();
				docIter != m_expandDocuments.end(); ++docIter)
			{
				string uniqueTerm(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(Url::canonicalizeUrl(*docIter)), true));

				// Only one document may have this term
				Xapian::PostingIterator postingIter = pIndex->postlist_begin(uniqueTerm);
				if (postingIter != pIndex->postlist_end(uniqueTerm))
				{
					expandDocs.add_document(*postingIter);
				}
			}
#ifdef DEBUG
			cout << "XapianEngine::queryDatabase: expand from " << expandDocs.size() << " documents" << endl;
#endif

			// Get 10 non-prefixed terms
			string allowedPrefixes("RS");
			TermDecider expandDecider(pIndex, ((stemLanguage.empty() == true) ? NULL : &m_stemmer),
				FileStopper::get_stopper(Languages::toCode(stemLanguage)),
				allowedPrefixes, query);
			Xapian::ESet expandTerms = enquire.get_eset(10, expandDocs, &expandDecider);
#ifdef DEBUG
			cout << "XapianEngine::queryDatabase: " << expandTerms.size() << " expand terms" << endl;
#endif
			for (Xapian::ESetIterator termIter = expandTerms.begin();
				termIter != expandTerms.end(); ++termIter)
			{
				string expandTerm(*termIter);
				char firstChar = expandTerm[0];

				// Is this prefixed ?
				if (allowedPrefixes.find(firstChar) != string::npos)
				{
					expandTerm.erase(0, 1);
				}

				m_expandTerms.insert(expandTerm);
			}
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't run query: " << error.get_type() << ": " << error.get_msg() << endl;
	}

	// Be tolerant of errors as long as we got some results
	if ((completedQuery == true) ||
		(m_resultsList.empty() == false))
	{
		return true;
	}

	return false;
}
Ejemplo n.º 6
0
bool XapianEngine::queryDatabase(Xapian::Database *pIndex, Xapian::Query &query)
{
	bool completedQuery = false;

	if (pIndex == NULL)
	{
		return false;
	}

	// Start an enquire session on the database
	Xapian::Enquire enquire(*pIndex);

	try
	{
		AbstractGenerator abstractGen(pIndex, 50);
		vector<string> seedTerms;

		// Give the query object to the enquire session
		enquire.set_query(query);

		// Get the top results of the query
		Xapian::MSet matches = enquire.get_mset(0, m_maxResultsCount);
		if (matches.empty() == false)
		{
			// Get the results
#ifdef DEBUG
			cout << "XapianEngine::queryDatabase: " << matches.get_matches_estimated()
				<< "/" << m_maxResultsCount << " results found" << endl;
#endif
			for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter)
			{
				Xapian::docid docId = *mIter;
				Xapian::Document doc(mIter.get_document());

				// What terms did this document match ?
				seedTerms.clear();
				for (Xapian::TermIterator termIter = enquire.get_matching_terms_begin(docId);
					termIter != enquire.get_matching_terms_end(docId); ++termIter)
				{
					seedTerms.push_back(*termIter);
				}

				DocumentInfo thisResult;
				thisResult.setExtract(abstractGen.generateAbstract(docId, seedTerms));
				thisResult.setScore((float)mIter.get_percent());

#ifdef DEBUG
				cout << "XapianEngine::queryDatabase: found document ID " << docId << endl;
#endif
				XapianDatabase::recordToProps(doc.get_data(), &thisResult);

				string url(thisResult.getLocation());
				if (url.empty() == true)
				{
					// Hmmm this shouldn't be empty...
					// Use this instead, even though the document isn't cached in the index
					thisResult.setLocation(XapianDatabase::buildUrl(m_databaseName, docId));
				}

				// We don't know the index ID, just the document ID
				thisResult.setIsIndexed(0, docId);

				// Add this result
				m_resultsList.push_back(thisResult);
			}
		}

		completedQuery = true;
	}
	catch (const Xapian::Error &error)
	{
		cerr << "XapianEngine::queryDatabase: " << error.get_type() << ": " << error.get_msg() << endl;
	}

	try
	{
		m_expandTerms.clear();

		// Expand the query ?
		if (m_relevantDocuments.empty() == false)
		{
			Xapian::RSet relevantDocs;
			unsigned int count = 0;

			for (set<unsigned int>::const_iterator docIter = m_relevantDocuments.begin();
				docIter != m_relevantDocuments.end(); ++docIter)
			{
				relevantDocs.add_document(*docIter);
			}

			// Get 10 non-prefixed terms
			Xapian::ESet expandTerms = enquire.get_eset(20, relevantDocs);
			for (Xapian::ESetIterator termIter = expandTerms.begin();
				(termIter != expandTerms.end()) && (count < 10); ++termIter)
			{
				if (isupper((int)((*termIter)[0])) == 0)
				{
					m_expandTerms.insert(*termIter);
					++count;
				}
			}
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "XapianEngine::queryDatabase: " << error.get_type() << ": " << error.get_msg() << endl;
	}

	// Be tolerant of errors as long as we got some results
	if ((completedQuery == true) ||
		(m_resultsList.empty() == false))
	{
		return true;
	}

	return false;
}
Ejemplo n.º 7
0
bool XapianEngine::queryDatabase(Xapian::Query &query)
{
	bool bStatus = false;

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true);
	if (pDatabase == NULL)
	{
		return false;
	}

	Xapian::Database *pIndex = pDatabase->readLock();
	if (pIndex != NULL)
	{
		try
		{
			// Start an enquire session on the database
			Xapian::Enquire enquire(*pIndex);

			// Give the query object to the enquire session
			enquire.set_query(query);

			// Get the top results of the query
			Xapian::MSet matches = enquire.get_mset(0, m_maxResultsCount);

			// Get the results
#ifdef DEBUG
			cout << "XapianEngine::queryDatabase: " << matches.get_matches_estimated() << "/" << m_maxResultsCount << " results found" << endl;
#endif
			for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter)
			{
				// Get the document data
				string record = mIter.get_document().get_data();

				// Get the title
				string title = StringManip::extractField(record, "caption=", "\n");
#ifdef DEBUG
				cout << "XapianEngine::queryDatabase: found omindex title " << title << endl;
#endif
				// Get the URL
				string url = StringManip::extractField(record, "url=", "\n");
				if (url.empty() == true)
				{
					// Hmmm this shouldn't be empty...
					// Use this instead, even though the document isn't cached in the index
					url = buildUrl(m_databaseName, *mIter);
				}
				else
				{
#ifdef DEBUG
					cout << "XapianEngine::queryDatabase: found omindex URL " << url << endl;
#endif
					url = Url::canonicalizeUrl(url);
				}

				// Get the summary and the type
				string summary = StringManip::extractField(record, "sample=", "\n");
#ifdef DEBUG
				cout << "XapianEngine::queryDatabase: found omindex summary " << summary << endl;
#endif
				string type = StringManip::extractField(record, "type=", "\n");
				// ...and finally the language, if available
				string language = StringManip::extractField(record, "language=", "\n");

				// Add this result
				Result thisResult(url, title, summary, language, (float)mIter.get_percent());
				m_resultsList.push_back(thisResult);
			}

			bStatus = true;
		}
		catch (const Xapian::Error &error)
		{
			cout << "XapianEngine::queryDatabase: couldn't run query: "  << error.get_msg() << endl;
		}
	}
	pDatabase->unlock();

	return bStatus;
}
Ejemplo n.º 8
0
    void QueryHandler(const QueryMessage &message, const Theron::Address from)
        {
            search::QueryInfo qi=*(message.query);
            std::string resKey(message.resKey);
            delete message.query;
            std::string segString;
            char *output=new char[qi.query.length()*9];
            char *input=new char[qi.query.length()*3];
            memset(output,0,qi.query.length()*9);
            memset(input,0,qi.query.length()*3);
            try 
            {
                UErrorCode  error = U_ZERO_ERROR;
                ucnv_convert("GBK","UTF-8",input,  qi.query.length()*3, qi.query.c_str(), qi.query.length(), &error );
                
                
                bool ret = result->ParagraphProcessing(input, output);
                if (ret)
                {
                    int oLen=strlen(output);
                    char *utf8out=new char[oLen*3];
                    memset(utf8out,0,oLen*3);
                    ucnv_convert("UTF-8","GBK",utf8out,  oLen*3, output, oLen, &error );
                    
                    segString=std::string(utf8out);
                    delete [] utf8out;
                }
            }
            catch (...) {
            }
            delete [] output;
            delete [] input;
            std::list<std::string> segList;
            if(segString.length()>0)
            {
                std::vector<std::string> resv;
                boost::algorithm::split( resv, segString, boost::algorithm::is_any_of(" ") );
                for(std::vector<std::string>::iterator it=resv.begin();it!=resv.end();++it)
                {
                    std::vector<std::string> tmpv;
                    boost::algorithm::split( tmpv, *it, boost::algorithm::is_any_of("/") );
                    if(tmpv.size()>1&&tmpv[1]!="w")
                        segList.push_back(std::string("K")+tmpv[0]);
                }
            }
            search::DocList *dList=new search::DocList();
            if(segList.size()>0)
            {
                Xapian::Query query(Xapian::Query::OP_AND,segList.begin(), segList.end());
                
                while(1)
                {
                    try
                    {
                        db.reopen();
                        Xapian::Enquire  enquire(db);
                        enquire.set_query(query);
                        Xapian::MSet matches = enquire.get_mset(0, 100);
                        for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
                            Xapian::Document doc = i.get_document();
                            search::IndexInfo info;
                            info.uid=doc.get_value(1);
                            info.attMap.insert(std::make_pair(std::string("title"),doc.get_value(2)));
                            info.content=doc.get_data();
                            dList->docList.push_back(info);
                        }
                        std::cout<<"doc size:"<<dList->docList.size()<<std::endl;
                        break;
                    }catch(Xapian::DatabaseModifiedError exception)
                    {
                        std::cout<<"try agian"<<std::endl;
                    }catch(...)
                    {
                        break;
                    }
                    
                }
                

            }
            Send(QueryResponceMessage(dList,resKey.c_str()), from);
            
        }
Ejemplo n.º 9
0
bool XapianEngine::queryDatabase(Xapian::Query &query)
{
	bool bStatus = false;

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, true);
	if (pDatabase == NULL)
	{
		return false;
	}

	// Get the latest revision...
	pDatabase->reopen();
	Xapian::Database *pIndex = pDatabase->readLock();
	if (pIndex != NULL)
	{
		try
		{
			// Start an enquire session on the database
			Xapian::Enquire enquire(*pIndex);

			// Give the query object to the enquire session
			enquire.set_query(query);

			// Get the top results of the query
			Xapian::MSet matches = enquire.get_mset(0, m_maxResultsCount);
			if (matches.empty() == false)
			{
				multimap<Xapian::weight, string> queryTerms;
				vector<string> seedTerms;
				Xapian::weight maxWeight = matches.get_max_attained();

				// Sort query terms by weight
				for (Xapian::TermIterator termIter = query.get_terms_begin();
					termIter != query.get_terms_end(); ++termIter)
				{
					string termName(*termIter);
					Xapian::weight termWeight = maxWeight - matches.get_termweight(termName);

					queryTerms.insert(pair<Xapian::weight, string>(termWeight, termName));
#ifdef DEBUG
					cout << "XapianEngine::queryDatabase: term " << termName
						<< " has weight " << matches.get_termweight(termName) << "/" << maxWeight << endl;
#endif
				}

				for (multimap<Xapian::weight, string>::iterator weightIter = queryTerms.begin();
					weightIter != queryTerms.end(); ++weightIter)
				{
					seedTerms.push_back(weightIter->second);
				}

				// Get the results
#ifdef DEBUG
				cout << "XapianEngine::queryDatabase: " << matches.get_matches_estimated() << "/" << m_maxResultsCount << " results found" << endl;
#endif
				for (Xapian::MSetIterator mIter = matches.begin(); mIter != matches.end(); ++mIter)
				{
					Xapian::docid docId = *mIter;
					Xapian::Document doc(mIter.get_document());
					string record = doc.get_data();

					// Get the title
					string title = StringManip::extractField(record, "caption=", "\n");
#ifdef DEBUG
					cout << "XapianEngine::queryDatabase: found omindex title " << title << endl;
#endif
					// Get the URL
					string url = StringManip::extractField(record, "url=", "\n");
					if (url.empty() == true)
					{
						// Hmmm this shouldn't be empty...
						// Use this instead, even though the document isn't cached in the index
						url = XapianDatabase::buildUrl(m_databaseName, *mIter);
					}
					else
					{
#ifdef DEBUG
						cout << "XapianEngine::queryDatabase: found omindex URL " << url << endl;
#endif
						url = Url::canonicalizeUrl(url);
					}

					// Get the type
					string type = StringManip::extractField(record, "type=", "\n");
					// ...and the language, if available
					string language = StringManip::extractField(record, "language=", "\n");

					// Finally, get a summary
					string summary = StringManip::extractField(record, "sample=", "\n");
					if (summary.empty() == true)
					{
						AbstractGenerator abstractGen(pIndex, 50);

						// Generate an abstract based on the query's terms
						summary = abstractGen.generateAbstract(seedTerms, docId);
					}

					// Add this result
					Result thisResult(url, title, summary, language, (float)mIter.get_percent());
					m_resultsList.push_back(thisResult);
				}
			}

			m_expandTerms.clear();

			// Expand the query ?
			if (m_relevantDocuments.empty() == false)
			{
				Xapian::RSet relevantDocs;
				unsigned int count = 0;

				for (set<unsigned int>::const_iterator docIter = m_relevantDocuments.begin();
					docIter != m_relevantDocuments.end(); ++docIter)
				{
					relevantDocs.add_document(*docIter);
				}

				// Get 10 non-prefixed terms
				Xapian::ESet expandTerms = enquire.get_eset(20, relevantDocs);
				for (Xapian::ESetIterator termIter = expandTerms.begin();
					(termIter != expandTerms.end()) && (count < 10); ++termIter)
				{
					if (isupper((int)((*termIter)[0])) == 0)
					{
						m_expandTerms.insert(*termIter);
						++count;
					}
				}
			}

			bStatus = true;
		}
		catch (const Xapian::Error &error)
		{
			cerr << "XapianEngine::queryDatabase: " << error.get_type() << ": " << error.get_msg() << endl;
		}
	}
	pDatabase->unlock();

	return bStatus;
}
Ejemplo n.º 10
0
/** Main routine */
int main(int argc,char **argv)
{
  // process inputs that were passed to us via QUERY_STRING
  std::cout << "Content-Type:application/javascript;charset=utf-8\r\n\n";
  std::string callback;
  try
  {
    // get input parameters
    const char *queryEnv = getenv("QUERY_STRING");
    std::string queryString;
    if (queryEnv)
    {
      queryString = queryEnv;
    }
    else if (argc>=2)
    {
      queryString = argv[1];
    }
    else
    {
      std::cout << "No input!\n";
      exit(1);
    }

    // parse query string
    std::vector<std::string> parts = split(queryString,'&');
    std::string searchFor,callback;
    int num=1,page=0;
    for (std::vector<std::string>::const_iterator it=parts.begin();it!=parts.end();++it)
    {
      std::vector<std::string> kv = split(*it,'=');
      if (kv.size()==2)
      {
        std::string val = uriDecode(kv[1]);
        if      (kv[0]=="q")  searchFor = val; 
        else if (kv[0]=="n")  num       = fromString<int>(val);
        else if (kv[0]=="p")  page      = fromString<int>(val);
        else if (kv[0]=="cb") callback  = val;
      }
    }

    std::string indexDir = "doxysearch.db";

    if (queryString=="test") // user test
    {
      bool dbOk = dirExists(indexDir);
      if (dbOk)
      {
        std::cout << "Test successful.";
      }
      else
      {
        std::cout << "Test failed: cannot find search index " << indexDir;
      }
      exit(0);
    }

    // create query
    Xapian::Database db(indexDir);
    Xapian::Enquire enquire(db);
    Xapian::Query query;
    std::vector<std::string> words = split(searchFor,' ');
    for (std::vector<std::string>::const_iterator it=words.begin();it!=words.end();++it)
    {
      query = Xapian::Query(Xapian::Query::OP_OR,query,Xapian::Query(*it));
    }
    enquire.set_query(query);

    // get results
    Xapian::MSet matches = enquire.get_mset(page*num,num);
    unsigned int hits    = matches.get_matches_estimated();
    unsigned int offset  = page*num;
    unsigned int pages   = num>0 ? (hits+num-1)/num : 0;
    if (offset>hits)     offset=hits;
    if (offset+num>hits) num=hits-offset;

    // write results as JSONP
    std::cout << callback.c_str() << "(";
    std::cout << "{" << std::endl 
              << "  \"hits\":"   << hits   << "," << std::endl
              << "  \"first\":"  << offset << "," << std::endl
              << "  \"count\":"  << num    << "," << std::endl
              << "  \"page\":"   << page   << "," << std::endl
              << "  \"pages\":"  << pages  << "," << std::endl
              << "  \"query\": \""  << escapeString(searchFor)  << "\"," << std::endl
              << "  \"items\":[" << std::endl;
    // foreach search result
    unsigned int o = offset;
    for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i,++o) 
    {
      std::vector<Fragment> hl;
      Xapian::Document doc = i.get_document();
      highlighter(doc.get_value(FIELD_DOC),words,hl);
      std::cout << "  {\"type\": \"" << doc.get_value(FIELD_TYPE) << "\"," << std::endl
                << "   \"name\": \"" << doc.get_value(FIELD_NAME) << doc.get_value(FIELD_ARGS) << "\"," << std::endl
                << "   \"tag\": \""  << doc.get_value(FIELD_TAG) << "\"," << std::endl
                << "   \"url\": \""  << doc.get_value(FIELD_URL) << "\"," << std::endl;
      std::cout << "   \"fragments\":[" << std::endl;
      int c=0;
      bool first=true;
      for (std::vector<Fragment>::const_iterator it = hl.begin();it!=hl.end() && c<3;++it,++c)
      {
        if (!first) std::cout << "," << std::endl;
        std::cout << "     \"" << escapeString((*it).text) << "\"";
        first=false;
      }
      if (!first) std::cout << std::endl;
      std::cout << "   ]" << std::endl;
      std::cout << "  }";
      if (o<offset+num-1) std::cout << ",";
      std::cout << std::endl;
    }
    std::cout << " ]" << std::endl << "})" << std::endl;
  } 
  catch (const Xapian::Error &e) // Xapian exception
  {
    showError(callback,e.get_description());
  } 
  catch (...) // Any other exception
  {
    showError(callback,"Unknown Exception!");
    exit(1);
  }
  return 0;
}
Ejemplo n.º 11
0
int main(int argc, char **argv)
{
    if(argc < 2) {
        usage(argv);
        return 1;
    }

    try {
        char *action = argv[1];
        char *db_path = argv[2];

        if(!strcmp(action, "index")) {
            Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN);

            Xapian::TermGenerator indexer;
            Xapian::Stem stemmer("english");
            indexer.set_stemmer(stemmer);

            std::string doc_txt;
            while(true) {
                if(std::cin.eof()) break;

                std::string line;
                getline(std::cin, line);
                doc_txt += line;
            }

            if(!doc_txt.empty()) {
                Xapian::Document doc;
                doc.set_data(doc_txt);

                indexer.set_document(doc);
                indexer.index_text(doc_txt);

                db.add_document(doc);

                std::cout << "Indexed: " << indexer.get_description() << std::endl;
            }

            db.commit();
        } else if(!strcmp(action, "search")) {
            if(argc < 4) {
                std::cerr << "You must supply a query string" << std::endl;
                return 1;
            }

            Xapian::Database db(db_path);
            Xapian::Enquire enquire(db);

            std::string query_str = argv[3];
            argv+= 4;
            while(*argv) {
                query_str += ' ';
                query_str += *argv++;
            }

            Xapian::QueryParser qp;
            Xapian::Stem stemmer("english");
            qp.set_stemmer(stemmer);
            qp.set_database(db);
            qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);

            Xapian::Query query = qp.parse_query(query_str);
            std::cout << "Parsed query is: " << query.get_description() <<
                         std::endl;

            enquire.set_query(query);
            Xapian::MSet matches = enquire.get_mset(0, 10);

            std::cout << matches.get_matches_estimated() << " results found.\n";
            std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl;

            for (Xapian::MSetIterator i = matches.begin();
                    i != matches.end(); ++i) {
                std::cout << i.get_rank() + 1 << ": " << i.get_percent() <<
                        "% docid=" << *i << " [" <<
                        i.get_document().get_data()<< "]" << std::endl <<
                        std::endl;
            }
        } else {
            std::cerr << "Invalid action " << action << std::endl;
            usage(argv);
            return 1;
        }

    } catch (const Xapian::Error &error) {
        std::cout << "Exception: " << error.get_msg() << std::endl;
    }
}