Esempio n. 1
0
void DataSystem::query(const std::string q)
{
	try {
		// Start an enquire session.
		Xapian::Enquire enquire(db);

		Xapian::Query query = qp.parse_query(q);
		printf("Parsed query is: %s", query.get_description().c_str());

		// Find the top 10 results for the query.
		enquire.set_query(query);
		Xapian::MSet matches = enquire.get_mset(0, 10);

		// Display the results.
		printf("%d results found.\n", matches.get_matches_estimated());
		printf("Matches 1-$d:\n", matches.size());

		for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i)
	{
			printf("%d: %d\% docid= [%s]\n\n",
				   i.get_rank() + 1,
				   i.get_percent(),
				   i.get_document().get_data().c_str());
		}
	} catch (const Xapian::Error &e)
	{
		printf("DataSystem, query error: %s", e.get_description());
	}
}
Esempio n. 2
0
    void QueryHandler(const QueryMessage &message, const Theron::Address from)
        {
            search::QueryInfo qi=*(message.query);
            std::string resKey(message.resKey);
            delete message.query;
            std::string segString;
            char *output=new char[qi.query.length()*9];
            char *input=new char[qi.query.length()*3];
            memset(output,0,qi.query.length()*9);
            memset(input,0,qi.query.length()*3);
            try 
            {
                UErrorCode  error = U_ZERO_ERROR;
                ucnv_convert("GBK","UTF-8",input,  qi.query.length()*3, qi.query.c_str(), qi.query.length(), &error );
                
                
                bool ret = result->ParagraphProcessing(input, output);
                if (ret)
                {
                    int oLen=strlen(output);
                    char *utf8out=new char[oLen*3];
                    memset(utf8out,0,oLen*3);
                    ucnv_convert("UTF-8","GBK",utf8out,  oLen*3, output, oLen, &error );
                    
                    segString=std::string(utf8out);
                    delete [] utf8out;
                }
            }
            catch (...) {
            }
            delete [] output;
            delete [] input;
            std::list<std::string> segList;
            if(segString.length()>0)
            {
                std::vector<std::string> resv;
                boost::algorithm::split( resv, segString, boost::algorithm::is_any_of(" ") );
                for(std::vector<std::string>::iterator it=resv.begin();it!=resv.end();++it)
                {
                    std::vector<std::string> tmpv;
                    boost::algorithm::split( tmpv, *it, boost::algorithm::is_any_of("/") );
                    if(tmpv.size()>1&&tmpv[1]!="w")
                        segList.push_back(std::string("K")+tmpv[0]);
                }
            }
            search::DocList *dList=new search::DocList();
            if(segList.size()>0)
            {
                Xapian::Query query(Xapian::Query::OP_AND,segList.begin(), segList.end());
                
                while(1)
                {
                    try
                    {
                        db.reopen();
                        Xapian::Enquire  enquire(db);
                        enquire.set_query(query);
                        Xapian::MSet matches = enquire.get_mset(0, 100);
                        for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
                            Xapian::Document doc = i.get_document();
                            search::IndexInfo info;
                            info.uid=doc.get_value(1);
                            info.attMap.insert(std::make_pair(std::string("title"),doc.get_value(2)));
                            info.content=doc.get_data();
                            dList->docList.push_back(info);
                        }
                        std::cout<<"doc size:"<<dList->docList.size()<<std::endl;
                        break;
                    }catch(Xapian::DatabaseModifiedError exception)
                    {
                        std::cout<<"try agian"<<std::endl;
                    }catch(...)
                    {
                        break;
                    }
                    
                }
                

            }
            Send(QueryResponceMessage(dList,resKey.c_str()), from);
            
        }
			void query(const std::vector<std::string>& arr_queries, const std::vector<std::string>& arr_selection = {}) {
				
				Xapian::Database databases_ir;
				try {
					Xapian::Database database_ir_object_values(path_database+"object_values/");
					databases_ir.add_database(database_ir_object_values);
				} catch (const Xapian::Error &e) {
					// Database not ready
				}
				try {
					Xapian::Database database_ir_object_descriptions(path_database+"object_descriptions/");
					databases_ir.add_database(database_ir_object_descriptions);
				} catch (const Xapian::Error &e) {
					// Database not ready
				}
				try {
					Xapian::Database database_ir_object_sub_descriptions(path_database+"object_sub_descriptions/");
					databases_ir.add_database(database_ir_object_sub_descriptions);
				} catch (const Xapian::Error &e) {
					// Database not ready
				}
				
				// Filter on Type IDs
				
				Xapian::Query query_ir_identifiers;
				
				if (!arr_selection.empty()) {
					
					std::vector<Xapian::Query> arr_query_identifiers;
					
					for (const auto& str_identifier_field : arr_selection) {

						arr_query_identifiers.push_back(Xapian::Query("T"+str_identifier_field));
					}
					
					query_ir_identifiers = Xapian::Query(Xapian::Query::OP_OR, arr_query_identifiers.begin(), arr_query_identifiers.end());
				}

				Xapian::QueryParser queryparser;
				queryparser.set_database(databases_ir); // Needed to enable specific query flags
				queryparser.set_stemmer(Xapian::Stem("en"));
				queryparser.set_stemming_strategy(queryparser.STEM_SOME);
				queryparser.add_boolean_prefix("identifier", "T");
				//queryparser.add_prefix("value", "SV");
				
				unsigned int count_queries = 0;
				
				for (const auto& str_query : arr_queries) {
					
					const auto query_id = count_queries;
					count_queries++;
						
					Xapian::Query query_ir = queryparser.parse_query(str_query, Xapian::QueryParser::FLAG_DEFAULT | Xapian::QueryParser::FLAG_WILDCARD);
					
					if (!arr_selection.empty()) {
						
						// Update main query
						
						query_ir = Xapian::Query(Xapian::Query::OP_FILTER, query_ir, query_ir_identifiers);
					}
					
					// Run query
					
					Xapian::Enquire enquire(databases_ir);
					enquire.set_query(query_ir);
					
					Xapian::MSet arr_msets = enquire.get_mset(num_offset, num_limit);

					for (Xapian::MSetIterator iterate_arr_mset = arr_msets.begin(); iterate_arr_mset != arr_msets.end(); iterate_arr_mset++) {
											
						//Xapian::docid did = *iterate_arr_mset;
						const int unsigned& nr_rank = iterate_arr_mset.get_rank();
						const int unsigned& nr_weight = iterate_arr_mset.get_weight();
						
						const Xapian::Document doc = iterate_arr_mset.get_document();
						const std::string& str_identifier = doc.get_value(0);
						
						if (map_query_results.find(str_identifier) == map_query_results.end()) {
							
							std::vector<unsigned int> arr_matches;
							
							arr_matches.push_back(query_id);

							const std::string& str_value = (include_value ? doc.get_data() : "");
							
							map_query_results[str_identifier] = std::make_tuple(nr_rank, nr_weight, arr_matches, str_value);
						} else {
							
							type_arr_query_result& arr_query_result = map_query_results[str_identifier];
							
							std::get<0>(arr_query_result) += nr_rank;
							std::get<1>(arr_query_result) += nr_weight;
							
							std::get<2>(arr_query_result).push_back(query_id);
						}
					}
				}
			}
Esempio n. 4
0
/** Main routine */
int main(int argc,char **argv)
{
  // process inputs that were passed to us via QUERY_STRING
  std::cout << "Content-Type:application/javascript;charset=utf-8\r\n\n";
  std::string callback;
  try
  {
    // get input parameters
    const char *queryEnv = getenv("QUERY_STRING");
    std::string queryString;
    if (queryEnv)
    {
      queryString = queryEnv;
    }
    else if (argc>=2)
    {
      queryString = argv[1];
    }
    else
    {
      std::cout << "No input!\n";
      exit(1);
    }

    // parse query string
    std::vector<std::string> parts = split(queryString,'&');
    std::string searchFor,callback;
    int num=1,page=0;
    for (std::vector<std::string>::const_iterator it=parts.begin();it!=parts.end();++it)
    {
      std::vector<std::string> kv = split(*it,'=');
      if (kv.size()==2)
      {
        std::string val = uriDecode(kv[1]);
        if      (kv[0]=="q")  searchFor = val; 
        else if (kv[0]=="n")  num       = fromString<int>(val);
        else if (kv[0]=="p")  page      = fromString<int>(val);
        else if (kv[0]=="cb") callback  = val;
      }
    }

    std::string indexDir = "doxysearch.db";

    if (queryString=="test") // user test
    {
      bool dbOk = dirExists(indexDir);
      if (dbOk)
      {
        std::cout << "Test successful.";
      }
      else
      {
        std::cout << "Test failed: cannot find search index " << indexDir;
      }
      exit(0);
    }

    // create query
    Xapian::Database db(indexDir);
    Xapian::Enquire enquire(db);
    Xapian::Query query;
    std::vector<std::string> words = split(searchFor,' ');
    for (std::vector<std::string>::const_iterator it=words.begin();it!=words.end();++it)
    {
      query = Xapian::Query(Xapian::Query::OP_OR,query,Xapian::Query(*it));
    }
    enquire.set_query(query);

    // get results
    Xapian::MSet matches = enquire.get_mset(page*num,num);
    unsigned int hits    = matches.get_matches_estimated();
    unsigned int offset  = page*num;
    unsigned int pages   = num>0 ? (hits+num-1)/num : 0;
    if (offset>hits)     offset=hits;
    if (offset+num>hits) num=hits-offset;

    // write results as JSONP
    std::cout << callback.c_str() << "(";
    std::cout << "{" << std::endl 
              << "  \"hits\":"   << hits   << "," << std::endl
              << "  \"first\":"  << offset << "," << std::endl
              << "  \"count\":"  << num    << "," << std::endl
              << "  \"page\":"   << page   << "," << std::endl
              << "  \"pages\":"  << pages  << "," << std::endl
              << "  \"query\": \""  << escapeString(searchFor)  << "\"," << std::endl
              << "  \"items\":[" << std::endl;
    // foreach search result
    unsigned int o = offset;
    for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i,++o) 
    {
      std::vector<Fragment> hl;
      Xapian::Document doc = i.get_document();
      highlighter(doc.get_value(FIELD_DOC),words,hl);
      std::cout << "  {\"type\": \"" << doc.get_value(FIELD_TYPE) << "\"," << std::endl
                << "   \"name\": \"" << doc.get_value(FIELD_NAME) << doc.get_value(FIELD_ARGS) << "\"," << std::endl
                << "   \"tag\": \""  << doc.get_value(FIELD_TAG) << "\"," << std::endl
                << "   \"url\": \""  << doc.get_value(FIELD_URL) << "\"," << std::endl;
      std::cout << "   \"fragments\":[" << std::endl;
      int c=0;
      bool first=true;
      for (std::vector<Fragment>::const_iterator it = hl.begin();it!=hl.end() && c<3;++it,++c)
      {
        if (!first) std::cout << "," << std::endl;
        std::cout << "     \"" << escapeString((*it).text) << "\"";
        first=false;
      }
      if (!first) std::cout << std::endl;
      std::cout << "   ]" << std::endl;
      std::cout << "  }";
      if (o<offset+num-1) std::cout << ",";
      std::cout << std::endl;
    }
    std::cout << " ]" << std::endl << "})" << std::endl;
  } 
  catch (const Xapian::Error &e) // Xapian exception
  {
    showError(callback,e.get_description());
  } 
  catch (...) // Any other exception
  {
    showError(callback,"Unknown Exception!");
    exit(1);
  }
  return 0;
}
Esempio n. 5
0
int main(int argc, char **argv)
{
    if(argc < 2) {
        usage(argv);
        return 1;
    }

    try {
        char *action = argv[1];
        char *db_path = argv[2];

        if(!strcmp(action, "index")) {
            Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN);

            Xapian::TermGenerator indexer;
            Xapian::Stem stemmer("english");
            indexer.set_stemmer(stemmer);

            std::string doc_txt;
            while(true) {
                if(std::cin.eof()) break;

                std::string line;
                getline(std::cin, line);
                doc_txt += line;
            }

            if(!doc_txt.empty()) {
                Xapian::Document doc;
                doc.set_data(doc_txt);

                indexer.set_document(doc);
                indexer.index_text(doc_txt);

                db.add_document(doc);

                std::cout << "Indexed: " << indexer.get_description() << std::endl;
            }

            db.commit();
        } else if(!strcmp(action, "search")) {
            if(argc < 4) {
                std::cerr << "You must supply a query string" << std::endl;
                return 1;
            }

            Xapian::Database db(db_path);
            Xapian::Enquire enquire(db);

            std::string query_str = argv[3];
            argv+= 4;
            while(*argv) {
                query_str += ' ';
                query_str += *argv++;
            }

            Xapian::QueryParser qp;
            Xapian::Stem stemmer("english");
            qp.set_stemmer(stemmer);
            qp.set_database(db);
            qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);

            Xapian::Query query = qp.parse_query(query_str);
            std::cout << "Parsed query is: " << query.get_description() <<
                         std::endl;

            enquire.set_query(query);
            Xapian::MSet matches = enquire.get_mset(0, 10);

            std::cout << matches.get_matches_estimated() << " results found.\n";
            std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl;

            for (Xapian::MSetIterator i = matches.begin();
                    i != matches.end(); ++i) {
                std::cout << i.get_rank() + 1 << ": " << i.get_percent() <<
                        "% docid=" << *i << " [" <<
                        i.get_document().get_data()<< "]" << std::endl <<
                        std::endl;
            }
        } else {
            std::cerr << "Invalid action " << action << std::endl;
            usage(argv);
            return 1;
        }

    } catch (const Xapian::Error &error) {
        std::cout << "Exception: " << error.get_msg() << std::endl;
    }
}