Example #1
0
int retrieve(option& opt, istream_type& is, ostream_type& os)
{
    typedef std::basic_string<char_type> string_type;
    typedef std::vector<string_type> strings_type;
    typedef simstring::reader reader_type;

    std::ostream& es = std::cerr;

    // Open the database.
    reader_type db;
    if (!db.open(opt.name)) {
        es << "ERROR: " << db.error() << std::endl;
        return 1;
    }

    // Check the size of characters.
    if (db.char_size() != sizeof(char_type)) {
        es << "ERROR: Inconsistent character encoding " <<
            "(DB:" << db.char_size() << ", " <<
            "CUR:" << sizeof(char_type) << "): " << std::endl;
        es << "This problem may be solved by specifying -u (--unicode) option." << std::endl;
        return 1;
    }

    int num_queries = 0;
    int num_retrieved = 0;
    clock_t clk_total = 0;
    for (;;) {
        // Read a line.
        string_type line;
        std::getline(is, line);
        if (is.eof()) {
            break;
        }

        // Issue a query.
        strings_type xstrs;
        clock_t clk = std::clock();
        db.retrieve(line, opt.measure, opt.threshold, std::back_inserter(xstrs));
        clock_t elapsed = (std::clock() - clk);

        // Update stats.
        clk_total += elapsed;
        num_retrieved += (int)xstrs.size();
        ++num_queries;

        // Do not output results when the benchmarking flag is on.
        if (!opt.benchmark) {
            // Output the query string if necessary.
            if (opt.echo_back) {
                os << line << std::endl;
            }

            // Output the retrieved strings.
            typename strings_type::const_iterator it;
            for (it = xstrs.begin();it != xstrs.end();++it) {
                os << os.widen('\t') << *it << std::endl;
            }
            os.flush();
        }

        // Do not output information when the quiet flag is on.
        if (!opt.quiet) {
            os <<
                xstrs.size() <<
                widen<char_type>(" strings retrieved (") <<
                (std::clock() - clk) / (double)CLOCKS_PER_SEC <<
                widen<char_type>(" sec)") << std::endl;
        }
    }

    // Output the benchmark information if necessary.
    if (opt.benchmark) {
        os <<
            widen<char_type>("Total number of queries: ") <<
            num_queries << std::endl;
        os <<
            widen<char_type>("Seconds per query: ") <<
            clk_total / (double)CLOCKS_PER_SEC / num_queries << std::endl;
        os <<
            widen<char_type>("Number of retrieved strings per query: ") <<
            num_retrieved / (double)num_queries << std::endl;
    }

    return 0;
}
Example #2
0
int build(option& opt, istream_type& is)
{
    typedef std::basic_string<char_type> string_type;
    typedef simstring::ngram_generator ngram_generator_type;
    typedef simstring::writer_base<string_type, ngram_generator_type> writer_type;
    
    std::ostream& os = std::cout;
    std::ostream& es = std::cerr;

    // Show the copyright information.
    version(os);

    // Show parameters for database construction.
    os << "Constructing the database" << std::endl;
    os << "Database name: " << opt.name << std::endl;
    os << "N-gram length: " << opt.ngram_size << std::endl;
    os << "Begin/end marks: " << std::boolalpha << opt.be << std::endl;
    os << "Char type: " << typeid(char_type).name() << " (" << sizeof(char_type) << ")" << std::endl;
    os.flush();

    // Open the database for construction.
    clock_t clk = std::clock();
    ngram_generator_type gen(opt.ngram_size, opt.be);
    writer_type db(gen, opt.name);
    if (db.fail()) {
        es << "ERROR: " << db.error() << std::endl;
        return 1;
    }

    // Insert every string from STDIN into the database.
    int n = 0;
    for (;;) {
        // Read a line.
        string_type line;
        std::getline(is, line);
        if (is.eof()) {
            break;
        }

        // Insert the string.
        if (!db.insert(line)) {
            es << "ERROR: " << db.error() << std::endl;
            return 1;
        }

        // Progress report.
        if (!opt.quiet && ++n % 10000 == 0) {
            os << "Number of strings: " << n << std::endl;
            os.flush();
        }
    }
    os << "Number of strings: " << n << std::endl;
    os << std::endl;
    os.flush();

    // Finalize the database.
    os << "Flushing the database" << std::endl;
    if (!db.close()) {
        es << "ERROR: " << db.error() << std::endl;
        return 1;
    }
    os << std::endl;

    // Report the elaped time for construction.
    os << "Total number of strings: " << n << std::endl;
    os << "Seconds required: "
        << (std::clock() - clk) / (double)CLOCKS_PER_SEC << std::endl;
    os << std::endl;
    os.flush();

    return 0;
}