Пример #1
0
  static int run(int argc, char **argv) {
    static const MeCab::Option long_options[] = {
      { "output",   'o',  0,   "FILE", "set the output filename" },
      { "version",  'v',  0,   0,    "show the version and exit"   },
      { "help",  'h',  0,   0,    "show this help and exit."   },
      { 0, 0, 0, 0 }
    };

    MeCab::Param param;
    param.open(argc, argv, long_options);

    if (!param.open(argc, argv, long_options)) {
      std::cout << param.what() << "\n\n" <<  COPYRIGHT
                << "\ntry '--help' for more information." << std::endl;
      return -1;
    }

    if (!param.help_version()) {
      return 0;
    }

    const std::vector<std::string> &tmp = param.rest_args();
    std::vector<std::string> files = tmp;
    if (files.empty()) {
      files.push_back("-");
    }

    std::string output = param.get<std::string>("output");
    if (output.empty()) output = "-";
    MeCab::ostream_wrapper ofs(output.c_str());
    CHECK_DIE(*ofs) << "permission denied: " << output;

    scoped_fixed_array<char, BUF_SIZE> buf;
    char *col[2];
    std::string str;
    for (size_t i = 0; i < files.size(); ++i) {
      MeCab::istream_wrapper ifs(files[i].c_str());
      CHECK_DIE(*ifs) << "no such file or directory: " << files[i];
      while (ifs->getline(buf.get(), buf.size())) {
        const size_t n = tokenize(buf.get(), "\t ", col, 2);
        CHECK_DIE(n <= 2) << "format error: " << buf.get();
        if (std::strcmp(col[0], "EOS") == 0 && !str.empty()) {
          *ofs << str << std::endl;
          str.clear();
        } else {
          str += col[0];
        }
      }
    }

    return 0;
  }
Пример #2
0
  static bool eval(int argc, char **argv) {
    static const MeCab::Option long_options[] = {
      { "level",  'l',  "0 -1",    "STR",    "set level of evaluations" },
      { "output", 'o',  0,         "FILE",   "set the output file name" },
      { "version",  'v',  0,   0,    "show the version and exit"   },
      { "help",  'h',  0,   0,    "show this help and exit."   },
      { 0, 0, 0, 0 }
    };

    MeCab::Param param;
    param.open(argc, argv, long_options);

    if (!param.open(argc, argv, long_options)) {
      std::cout << param.what() << "\n\n" <<  COPYRIGHT
                << "\ntry '--help' for more information." << std::endl;
      return -1;
    }

    if (!param.help_version()) return 0;

    const std::vector<std::string> &files = param.rest_args();
    if (files.size() < 2) {
      std::cout << "Usage: " <<
          param.program_name() << " output answer" << std::endl;
      return -1;
    }

    std::string output = param.get<std::string>("output");
    if (output.empty()) output = "-";
    MeCab::ostream_wrapper ofs(output.c_str());
    CHECK_DIE(*ofs) << "no such file or directory: " << output;

    const std::string system = files[0];
    const std::string answer = files[1];

    const std::string level_str = param.get<std::string>("level");

    std::ifstream ifs1(files[0].c_str());
    std::ifstream ifs2(files[1].c_str());

    CHECK_DIE(ifs1) << "no such file or directory: " << files[0].c_str();
    CHECK_DIE(ifs2) << "no such file or directory: " << files[0].c_str();
    CHECK_DIE(!level_str.empty()) << "level_str is NULL";

    std::vector<int> level;
    parseLevel(level_str.c_str(), &level);
    CHECK_DIE(level.size()) << "level_str is empty: " << level_str;
    std::vector<size_t> result_tbl(level.size());
    std::fill(result_tbl.begin(), result_tbl.end(), 0);

    size_t prec = 0;
    size_t recall = 0;

    std::vector<std::vector<std::string> > r1;
    std::vector<std::vector<std::string> > r2;

    while (true) {
      if (!read(&ifs1, &r1, level) || !read(&ifs2, &r2, level))
        break;

      size_t i1 = 0;
      size_t i2 = 0;
      size_t p1 = 0;
      size_t p2 = 0;

      while (i1 < r1.size() && i2 < r2.size()) {
        if (p1 == p2) {
          for (size_t i = 0; i < result_tbl.size(); ++i) {
            if (r1[i1][i] == r2[i2][i]) {
              result_tbl[i]++;
            }
          }
          p1 += r1[i1][0].size();
          p2 += r2[i2][0].size();
          ++i1;
          ++i2;
          ++prec;
          ++recall;
        } else if (p1 < p2) {
          p1 += r1[i1][0].size();
          ++i1;
          ++prec;
        } else {
          p2 += r2[i2][0].size();
          ++i2;
          ++recall;
        }
      }

      while (i1 < r1.size()) {
        ++prec;
        ++i1;
      }

      while (i2 < r2.size()) {
        ++recall;
        ++i2;
      }
    }

    *ofs <<  "              precision          recall         F"
         << std::endl;
    for (size_t i = 0; i < result_tbl.size(); ++i) {
      if (level[i] == -1) {
        *ofs << "LEVEL ALL: ";
      } else {
        *ofs << "LEVEL " << level[i] << ":    ";
      }
      printeval(&*ofs, result_tbl[i], prec, recall);
    }

    return true;
  }