void HistoryLogger::saveMessage(const Message* message) { if (message->flags() & MESSAGE_FLAG_ALARM) return; Xapian::Document doc; quint32 flags = message->flags(); std::string plainText(message->plainText().toUtf8()); std::string confUser(message->getConfUser().constData()); std::string data; if (flags & MESSAGE_FLAG_RTF) data = message->rtfText().constData(); else data = plainText; std::cout << "HistoryLogger::saveMessage data = " << data << std::endl; doc.set_data(data); Xapian::TermGenerator termGen; termGen.set_stemmer(Xapian::Stem("ru")); termGen.set_document(doc); termGen.index_text(plainText); doc.add_value(0, message->dateTime().toString("yyyyMMdd").toStdString()); doc.add_value(1, message->dateTime().toString("hhmmss").toStdString()); doc.add_value(2, QString::number(flags, 16).toStdString()); doc.add_value(3, message->type() == Message::Outgoing? "o" : "i"); doc.add_value(4, confUser); database->add_document(doc); database->flush(); }
Indexer::Indexer(const string &datapath, const string &dbpath) { // Hardcode field offsets for simplicity. const size_t FIELD_ID_NUMBER = 0; const size_t FIELD_TITLE = 2; const size_t FIELD_DESCRIPTION = 8; // Create or open the database we're going to be writing to. Xapian::WritableDatabase db(dbpath, Xapian::DB_CREATE_OR_OPEN); // Set up a TermGenerator that we'll use in indexing. Xapian::TermGenerator termgenerator; termgenerator.set_stemmer(Xapian::Stem("en")); ifstream csv(datapath.c_str()); vector<string> fields; csv_parse_line(csv, fields); // Check the CSV header line matches our hard-code offsets. if (fields.at(FIELD_ID_NUMBER) != "id_NUMBER" || fields.at(FIELD_TITLE) != "TITLE" || fields.at(FIELD_DESCRIPTION) != "DESCRIPTION") { // The CSV format doesn't match what we expect. cerr << "CSV format has changed!" << endl; exit(1); } while (csv_parse_line(csv, fields)) { // 'fields' is a vector mapping from field number to value. // We look up fields with the 'at' method so we get an exception // if that field isn't set. // // We're just going to use DESCRIPTION, TITLE and id_NUMBER. const string & description = fields.at(FIELD_DESCRIPTION); const string & title = fields.at(FIELD_TITLE); const string & identifier = fields.at(FIELD_ID_NUMBER); // We make a document and tell the term generator to use this. Xapian::Document doc; termgenerator.set_document(doc); // Index each field with a suitable prefix. termgenerator.index_text(title, 1, "S"); termgenerator.index_text(description, 1, "XD"); // Index fields without prefixes for general search. termgenerator.index_text(title); termgenerator.increase_termpos(); termgenerator.index_text(description); // Store all the fields for display purposes. doc.set_data(identifier + "\n" + title + "\n" + description); // We use the identifier to ensure each object ends up in the // database only once no matter how many times we run the // indexer. string idterm = "Q" + identifier; doc.add_boolean_term(idterm); db.replace_document(idterm, doc); } }
int main(int argc, char **argv) { if(argc < 2) { usage(argv); return 1; } try { char *action = argv[1]; char *db_path = argv[2]; if(!strcmp(action, "index")) { Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN); Xapian::TermGenerator indexer; Xapian::Stem stemmer("english"); indexer.set_stemmer(stemmer); std::string doc_txt; while(true) { if(std::cin.eof()) break; std::string line; getline(std::cin, line); doc_txt += line; } if(!doc_txt.empty()) { Xapian::Document doc; doc.set_data(doc_txt); indexer.set_document(doc); indexer.index_text(doc_txt); db.add_document(doc); std::cout << "Indexed: " << indexer.get_description() << std::endl; } db.commit(); } else if(!strcmp(action, "search")) { if(argc < 4) { std::cerr << "You must supply a query string" << std::endl; return 1; } Xapian::Database db(db_path); Xapian::Enquire enquire(db); std::string query_str = argv[3]; argv+= 4; while(*argv) { query_str += ' '; query_str += *argv++; } Xapian::QueryParser qp; Xapian::Stem stemmer("english"); qp.set_stemmer(stemmer); qp.set_database(db); qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); Xapian::Query query = qp.parse_query(query_str); std::cout << "Parsed query is: " << query.get_description() << std::endl; enquire.set_query(query); Xapian::MSet matches = enquire.get_mset(0, 10); std::cout << matches.get_matches_estimated() << " results found.\n"; std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl; for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { std::cout << i.get_rank() + 1 << ": " << i.get_percent() << "% docid=" << *i << " [" << i.get_document().get_data()<< "]" << std::endl << std::endl; } } else { std::cerr << "Invalid action " << action << std::endl; usage(argv); return 1; } } catch (const Xapian::Error &error) { std::cout << "Exception: " << error.get_msg() << std::endl; } }