Beispiel #1
0
void HistoryLogger::saveMessage(const Message* message)
{
	if (message->flags() & MESSAGE_FLAG_ALARM)
		return;
	
	Xapian::Document doc;
	
	quint32 flags = message->flags();
	std::string plainText(message->plainText().toUtf8());
	std::string confUser(message->getConfUser().constData());

	std::string data;
	if (flags & MESSAGE_FLAG_RTF)
		data = message->rtfText().constData();
	else
		data = plainText;

	std::cout << "HistoryLogger::saveMessage data = " << data << std::endl;
	doc.set_data(data);

	Xapian::TermGenerator termGen;
	termGen.set_stemmer(Xapian::Stem("ru"));
	termGen.set_document(doc);
	termGen.index_text(plainText);

	doc.add_value(0, message->dateTime().toString("yyyyMMdd").toStdString());
	doc.add_value(1, message->dateTime().toString("hhmmss").toStdString());
	doc.add_value(2, QString::number(flags, 16).toStdString());
	doc.add_value(3, message->type() == Message::Outgoing? "o" : "i");
	doc.add_value(4, confUser);

	database->add_document(doc);
	database->flush();
}
Indexer::Indexer(const string &datapath, const string &dbpath)
{
    // Hardcode field offsets for simplicity.
    const size_t FIELD_ID_NUMBER = 0;
    const size_t FIELD_TITLE = 2;
    const size_t FIELD_DESCRIPTION = 8;

    // Create or open the database we're going to be writing to.
    Xapian::WritableDatabase db(dbpath, Xapian::DB_CREATE_OR_OPEN);

    // Set up a TermGenerator that we'll use in indexing.
    Xapian::TermGenerator termgenerator;
    termgenerator.set_stemmer(Xapian::Stem("en"));

    ifstream csv(datapath.c_str());
    vector<string> fields;
    csv_parse_line(csv, fields);

    // Check the CSV header line matches our hard-code offsets.
    if (fields.at(FIELD_ID_NUMBER) != "id_NUMBER" ||
    fields.at(FIELD_TITLE) != "TITLE" ||
    fields.at(FIELD_DESCRIPTION) != "DESCRIPTION") {
    // The CSV format doesn't match what we expect.
    cerr << "CSV format has changed!" << endl;
    exit(1);
    }

    while (csv_parse_line(csv, fields)) {
    // 'fields' is a vector mapping from field number to value.
    // We look up fields with the 'at' method so we get an exception
    // if that field isn't set.
    //
    // We're just going to use DESCRIPTION, TITLE and id_NUMBER.
    const string & description = fields.at(FIELD_DESCRIPTION);
    const string & title = fields.at(FIELD_TITLE);
    const string & identifier = fields.at(FIELD_ID_NUMBER);

    // We make a document and tell the term generator to use this.
    Xapian::Document doc;
    termgenerator.set_document(doc);

    // Index each field with a suitable prefix.
    termgenerator.index_text(title, 1, "S");
    termgenerator.index_text(description, 1, "XD");

    // Index fields without prefixes for general search.
    termgenerator.index_text(title);
    termgenerator.increase_termpos();
    termgenerator.index_text(description);

    // Store all the fields for display purposes.
    doc.set_data(identifier + "\n" + title + "\n" + description);

    // We use the identifier to ensure each object ends up in the
    // database only once no matter how many times we run the
    // indexer.
    string idterm = "Q" + identifier;
    doc.add_boolean_term(idterm);
    db.replace_document(idterm, doc);
    }
}
Beispiel #3
0
int main(int argc, char **argv)
{
    if(argc < 2) {
        usage(argv);
        return 1;
    }

    try {
        char *action = argv[1];
        char *db_path = argv[2];

        if(!strcmp(action, "index")) {
            Xapian::WritableDatabase db(db_path, Xapian::DB_CREATE_OR_OPEN);

            Xapian::TermGenerator indexer;
            Xapian::Stem stemmer("english");
            indexer.set_stemmer(stemmer);

            std::string doc_txt;
            while(true) {
                if(std::cin.eof()) break;

                std::string line;
                getline(std::cin, line);
                doc_txt += line;
            }

            if(!doc_txt.empty()) {
                Xapian::Document doc;
                doc.set_data(doc_txt);

                indexer.set_document(doc);
                indexer.index_text(doc_txt);

                db.add_document(doc);

                std::cout << "Indexed: " << indexer.get_description() << std::endl;
            }

            db.commit();
        } else if(!strcmp(action, "search")) {
            if(argc < 4) {
                std::cerr << "You must supply a query string" << std::endl;
                return 1;
            }

            Xapian::Database db(db_path);
            Xapian::Enquire enquire(db);

            std::string query_str = argv[3];
            argv+= 4;
            while(*argv) {
                query_str += ' ';
                query_str += *argv++;
            }

            Xapian::QueryParser qp;
            Xapian::Stem stemmer("english");
            qp.set_stemmer(stemmer);
            qp.set_database(db);
            qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);

            Xapian::Query query = qp.parse_query(query_str);
            std::cout << "Parsed query is: " << query.get_description() <<
                         std::endl;

            enquire.set_query(query);
            Xapian::MSet matches = enquire.get_mset(0, 10);

            std::cout << matches.get_matches_estimated() << " results found.\n";
            std::cout << "Matches 1-" << matches.size() << ":\n" << std::endl;

            for (Xapian::MSetIterator i = matches.begin();
                    i != matches.end(); ++i) {
                std::cout << i.get_rank() + 1 << ": " << i.get_percent() <<
                        "% docid=" << *i << " [" <<
                        i.get_document().get_data()<< "]" << std::endl <<
                        std::endl;
            }
        } else {
            std::cerr << "Invalid action " << action << std::endl;
            usage(argv);
            return 1;
        }

    } catch (const Xapian::Error &error) {
        std::cout << "Exception: " << error.get_msg() << std::endl;
    }
}