static int run(int argc, char **argv) { static const MeCab::Option long_options[] = { { "output", 'o', 0, "FILE", "set the output filename" }, { "version", 'v', 0, 0, "show the version and exit" }, { "help", 'h', 0, 0, "show this help and exit." }, { 0, 0, 0, 0 } }; MeCab::Param param; param.open(argc, argv, long_options); if (!param.open(argc, argv, long_options)) { std::cout << param.what() << "\n\n" << COPYRIGHT << "\ntry '--help' for more information." << std::endl; return -1; } if (!param.help_version()) { return 0; } const std::vector<std::string> &tmp = param.rest_args(); std::vector<std::string> files = tmp; if (files.empty()) { files.push_back("-"); } std::string output = param.get<std::string>("output"); if (output.empty()) output = "-"; MeCab::ostream_wrapper ofs(output.c_str()); CHECK_DIE(*ofs) << "permission denied: " << output; scoped_fixed_array<char, BUF_SIZE> buf; char *col[2]; std::string str; for (size_t i = 0; i < files.size(); ++i) { MeCab::istream_wrapper ifs(files[i].c_str()); CHECK_DIE(*ifs) << "no such file or directory: " << files[i]; while (ifs->getline(buf.get(), buf.size())) { const size_t n = tokenize(buf.get(), "\t ", col, 2); CHECK_DIE(n <= 2) << "format error: " << buf.get(); if (std::strcmp(col[0], "EOS") == 0 && !str.empty()) { *ofs << str << std::endl; str.clear(); } else { str += col[0]; } } } return 0; }
int mecab_do(int argc, char **argv) { //debug std::cout << "[" << __LINE__ << "]: " << "mecab_do(int argc, char **argv)" << __FILE__ << std::endl; /// #define WHAT_ERROR(msg) do { \ std::cout << msg << std::endl; \ return EXIT_FAILURE; } \ while (0); // //debug // std::cout << "[" << __LINE__ << "]: " << __FILE__ << std::endl; // WHAT_ERROR("ABCDE"); // /// MeCab::Param param; if (!param.open(argc, argv, MeCab::long_options)) { std::cout << param.what() << std::endl; return EXIT_FAILURE; } if (param.get<bool>("help")) { std::cout << param.help() << std::endl; return EXIT_SUCCESS; } if (param.get<bool>("version")) { std::cout << param.version() << std::endl; return EXIT_SUCCESS; } if (!load_dictionary_resource(¶m)) { std::cout << param.what() << std::endl; return EXIT_SUCCESS; } if (param.get<int>("lattice-level") >= 1) { std::cerr << "lattice-level is DEPERCATED. " << "use --marginal or --nbest." << std::endl; } //debug std::cout << "[" << __LINE__ << "]: " << __FILE__ << std::endl; int dbg = param.get<int>("lattice-level"); std::cout << " param.get<int>(\"lattice-level\") => " << dbg << std::endl; /// MeCab::scoped_ptr<MeCab::ModelImpl> model(new MeCab::ModelImpl); if (!model->open(param)) { std::cout << MeCab::getLastError() << std::endl; return EXIT_FAILURE; } std::string ofilename = param.get<std::string>("output"); if (ofilename.empty()) { ofilename = "-"; } const int nbest = param.get<int>("nbest"); if (nbest <= 0 || nbest > NBEST_MAX) { WHAT_ERROR("invalid N value"); } MeCab::ostream_wrapper ofs(ofilename.c_str()); if (!*ofs) { WHAT_ERROR("no such file or directory: " << ofilename); } if (param.get<bool>("dump-config")) { param.dump_config(&*ofs); return EXIT_FAILURE; } if (param.get<bool>("dictionary-info")) { for (const MeCab::DictionaryInfo *d = model->dictionary_info(); d; d = d->next) { *ofs << "filename:\t" << d->filename << std::endl; *ofs << "version:\t" << d->version << std::endl; *ofs << "charset:\t" << d->charset << std::endl; *ofs << "type:\t" << d->type << std::endl; *ofs << "size:\t" << d->size << std::endl; *ofs << "left size:\t" << d->lsize << std::endl; *ofs << "right size:\t" << d->rsize << std::endl; *ofs << std::endl; } return EXIT_FAILURE; } const std::vector<std::string>& rest_ = param.rest_args(); std::vector<std::string> rest = rest_; if (rest.empty()) { rest.push_back("-"); } size_t ibufsize = std::min(MAX_INPUT_BUFFER_SIZE, std::max(param.get<int> ("input-buffer-size"), MIN_INPUT_BUFFER_SIZE)); const bool partial = param.get<bool>("partial"); if (partial) { ibufsize *= 8; } MeCab::scoped_array<char> ibuf_data(new char[ibufsize]); char *ibuf = ibuf_data.get(); MeCab::scoped_ptr<MeCab::Tagger> tagger(model->createTagger()); if (!tagger.get()) { WHAT_ERROR("cannot create tagger"); } for (size_t i = 0; i < rest.size(); ++i) { MeCab::istream_wrapper ifs(rest[i].c_str()); if (!*ifs) { WHAT_ERROR("no such file or directory: " << rest[i]); } while (true) { if (!partial) { ifs->getline(ibuf, ibufsize); } else { std::string sentence; MeCab::scoped_fixed_array<char, BUF_SIZE> line; for (;;) { if (!ifs->getline(line.get(), line.size())) { ifs->clear(std::ios::eofbit|std::ios::badbit); break; } sentence += line.get(); sentence += '\n'; if (std::strcmp(line.get(), "EOS") == 0 || line[0] == '\0') { break; } } std::strncpy(ibuf, sentence.c_str(), ibufsize); } if (ifs->eof() && !ibuf[0]) { return false; } if (ifs->fail()) { std::cerr << "input-buffer overflow. " << "The line is split. use -b #SIZE option." << std::endl; ifs->clear(); } const char *r = (nbest >= 2) ? tagger->parseNBest(nbest, ibuf) : tagger->parse(ibuf); if (!r) { WHAT_ERROR(tagger->what()); } *ofs << r << std::flush; } } return EXIT_SUCCESS; #undef WHAT_ERROR }
static bool eval(int argc, char **argv) { static const MeCab::Option long_options[] = { { "level", 'l', "0 -1", "STR", "set level of evaluations" }, { "output", 'o', 0, "FILE", "set the output file name" }, { "version", 'v', 0, 0, "show the version and exit" }, { "help", 'h', 0, 0, "show this help and exit." }, { 0, 0, 0, 0 } }; MeCab::Param param; param.open(argc, argv, long_options); if (!param.open(argc, argv, long_options)) { std::cout << param.what() << "\n\n" << COPYRIGHT << "\ntry '--help' for more information." << std::endl; return -1; } if (!param.help_version()) return 0; const std::vector<std::string> &files = param.rest_args(); if (files.size() < 2) { std::cout << "Usage: " << param.program_name() << " output answer" << std::endl; return -1; } std::string output = param.get<std::string>("output"); if (output.empty()) output = "-"; MeCab::ostream_wrapper ofs(output.c_str()); CHECK_DIE(*ofs) << "no such file or directory: " << output; const std::string system = files[0]; const std::string answer = files[1]; const std::string level_str = param.get<std::string>("level"); std::ifstream ifs1(files[0].c_str()); std::ifstream ifs2(files[1].c_str()); CHECK_DIE(ifs1) << "no such file or directory: " << files[0].c_str(); CHECK_DIE(ifs2) << "no such file or directory: " << files[0].c_str(); CHECK_DIE(!level_str.empty()) << "level_str is NULL"; std::vector<int> level; parseLevel(level_str.c_str(), &level); CHECK_DIE(level.size()) << "level_str is empty: " << level_str; std::vector<size_t> result_tbl(level.size()); std::fill(result_tbl.begin(), result_tbl.end(), 0); size_t prec = 0; size_t recall = 0; std::vector<std::vector<std::string> > r1; std::vector<std::vector<std::string> > r2; while (true) { if (!read(&ifs1, &r1, level) || !read(&ifs2, &r2, level)) break; size_t i1 = 0; size_t i2 = 0; size_t p1 = 0; size_t p2 = 0; while (i1 < r1.size() && i2 < r2.size()) { if (p1 == p2) { for (size_t i = 0; i < result_tbl.size(); ++i) { if (r1[i1][i] == r2[i2][i]) { result_tbl[i]++; } } p1 += r1[i1][0].size(); p2 += r2[i2][0].size(); ++i1; ++i2; ++prec; ++recall; } else if (p1 < p2) { p1 += r1[i1][0].size(); ++i1; ++prec; } else { p2 += r2[i2][0].size(); ++i2; ++recall; } } while (i1 < r1.size()) { ++prec; ++i1; } while (i2 < r2.size()) { ++recall; ++i2; } } *ofs << " precision recall F" << std::endl; for (size_t i = 0; i < result_tbl.size(); ++i) { if (level[i] == -1) { *ofs << "LEVEL ALL: "; } else { *ofs << "LEVEL " << level[i] << ": "; } printeval(&*ofs, result_tbl[i], prec, recall); } return true; }