int retrieve(option& opt, istream_type& is, ostream_type& os) { typedef std::basic_string<char_type> string_type; typedef std::vector<string_type> strings_type; typedef simstring::reader reader_type; std::ostream& es = std::cerr; // Open the database. reader_type db; if (!db.open(opt.name)) { es << "ERROR: " << db.error() << std::endl; return 1; } // Check the size of characters. if (db.char_size() != sizeof(char_type)) { es << "ERROR: Inconsistent character encoding " << "(DB:" << db.char_size() << ", " << "CUR:" << sizeof(char_type) << "): " << std::endl; es << "This problem may be solved by specifying -u (--unicode) option." << std::endl; return 1; } int num_queries = 0; int num_retrieved = 0; clock_t clk_total = 0; for (;;) { // Read a line. string_type line; std::getline(is, line); if (is.eof()) { break; } // Issue a query. strings_type xstrs; clock_t clk = std::clock(); db.retrieve(line, opt.measure, opt.threshold, std::back_inserter(xstrs)); clock_t elapsed = (std::clock() - clk); // Update stats. clk_total += elapsed; num_retrieved += (int)xstrs.size(); ++num_queries; // Do not output results when the benchmarking flag is on. if (!opt.benchmark) { // Output the query string if necessary. if (opt.echo_back) { os << line << std::endl; } // Output the retrieved strings. typename strings_type::const_iterator it; for (it = xstrs.begin();it != xstrs.end();++it) { os << os.widen('\t') << *it << std::endl; } os.flush(); } // Do not output information when the quiet flag is on. if (!opt.quiet) { os << xstrs.size() << widen<char_type>(" strings retrieved (") << (std::clock() - clk) / (double)CLOCKS_PER_SEC << widen<char_type>(" sec)") << std::endl; } } // Output the benchmark information if necessary. if (opt.benchmark) { os << widen<char_type>("Total number of queries: ") << num_queries << std::endl; os << widen<char_type>("Seconds per query: ") << clk_total / (double)CLOCKS_PER_SEC / num_queries << std::endl; os << widen<char_type>("Number of retrieved strings per query: ") << num_retrieved / (double)num_queries << std::endl; } return 0; }
int build(option& opt, istream_type& is) { typedef std::basic_string<char_type> string_type; typedef simstring::ngram_generator ngram_generator_type; typedef simstring::writer_base<string_type, ngram_generator_type> writer_type; std::ostream& os = std::cout; std::ostream& es = std::cerr; // Show the copyright information. version(os); // Show parameters for database construction. os << "Constructing the database" << std::endl; os << "Database name: " << opt.name << std::endl; os << "N-gram length: " << opt.ngram_size << std::endl; os << "Begin/end marks: " << std::boolalpha << opt.be << std::endl; os << "Char type: " << typeid(char_type).name() << " (" << sizeof(char_type) << ")" << std::endl; os.flush(); // Open the database for construction. clock_t clk = std::clock(); ngram_generator_type gen(opt.ngram_size, opt.be); writer_type db(gen, opt.name); if (db.fail()) { es << "ERROR: " << db.error() << std::endl; return 1; } // Insert every string from STDIN into the database. int n = 0; for (;;) { // Read a line. string_type line; std::getline(is, line); if (is.eof()) { break; } // Insert the string. if (!db.insert(line)) { es << "ERROR: " << db.error() << std::endl; return 1; } // Progress report. if (!opt.quiet && ++n % 10000 == 0) { os << "Number of strings: " << n << std::endl; os.flush(); } } os << "Number of strings: " << n << std::endl; os << std::endl; os.flush(); // Finalize the database. os << "Flushing the database" << std::endl; if (!db.close()) { es << "ERROR: " << db.error() << std::endl; return 1; } os << std::endl; // Report the elaped time for construction. os << "Total number of strings: " << n << std::endl; os << "Seconds required: " << (std::clock() - clk) / (double)CLOCKS_PER_SEC << std::endl; os << std::endl; os.flush(); return 0; }