Ejemplo n.º 1
0
bool ParserImpl::open(Param *param) {
  close();

  std::string rcfile = param->get<std::string>("rcfile");
  if (rcfile.empty()) {
    rcfile = get_default_rc();
  }

  if (!param->load(rcfile.c_str())) {
    WHAT << param->what();
    close();
    return false;
  }

  std::string rcpath = rcfile;
  remove_filename(&rcpath);

  REPLACE_PROFILE(param, rcpath, "parser-model");
  REPLACE_PROFILE(param, rcpath, "chunker-model");
  REPLACE_PROFILE(param, rcpath, "ne-model");
  REPLACE_PROFILE(param, rcpath, "chasenrc");

  close();

  input_layer_   =
      static_cast<InputLayerType>(param->get<int>("input-layer"));
  output_layer_  =
      static_cast<OutputLayerType>(param->get<int>("output-layer"));
  output_format_ =
      static_cast<FormatType>(param->get<int>("output-format"));

  const int action = param->get<int>("action-mode");
  const int ne     = param->get<int>("ne");
  if (action == TRAINING_MODE) {
    output_format_ = FORMAT_NONE;
  }

  if (output_layer_ != OUTPUT_DEP) {
    output_format_ = FORMAT_LATTICE;
  }

  charset_ = get_charset(*param, rcpath);
  posset_ = decode_posset(param->get<std::string>("posset").c_str());

  CHECK_FALSE(charset_ != -1);
  CHECK_FALSE(posset_ != -1);

  switch (input_layer_) {
    case INPUT_RAW_SENTENCE:  // case 1
      {
        switch (output_layer_) {
          case OUTPUT_POS:
            PUSH_ANALYZER(MorphAnalyzer);
            if (ne) PUSH_ANALYZER(NE);
            break;
          case OUTPUT_CHUNK:
            PUSH_ANALYZER(MorphAnalyzer);
            if (ne) PUSH_ANALYZER(NE);
            PUSH_ANALYZER(Chunker);
            break;
          case OUTPUT_SELECTION:
            PUSH_ANALYZER(MorphAnalyzer);
            if (ne) PUSH_ANALYZER(NE);
            PUSH_ANALYZER(Chunker);
            PUSH_ANALYZER(Selector);
            break;
          case OUTPUT_DEP:
            PUSH_ANALYZER(MorphAnalyzer);
            if (ne) PUSH_ANALYZER(NE);
            PUSH_ANALYZER(Chunker);
            PUSH_ANALYZER(Selector);
            PUSH_ANALYZER(DependencyParser);
            break;
          default:
            break;
        }
        break;
      }

    case INPUT_POS:  // case 2
      {
        if (ne) PUSH_ANALYZER(NE);
        switch (output_layer_) {
          case OUTPUT_POS:
            break;
          case OUTPUT_CHUNK:
            PUSH_ANALYZER(Chunker);
            break;
          case OUTPUT_SELECTION:
            PUSH_ANALYZER(Chunker);
            PUSH_ANALYZER(Selector);
            break;
          case OUTPUT_DEP:
            PUSH_ANALYZER(Chunker);
            PUSH_ANALYZER(Selector);
            PUSH_ANALYZER(DependencyParser);
            break;
          default:
            break;
        }
        break;
      }

    case INPUT_CHUNK:  // case 3
      {
        switch (output_layer_) {
          case OUTPUT_POS:
          case OUTPUT_CHUNK:
            break;
          case OUTPUT_SELECTION:
            PUSH_ANALYZER(Selector);
            break;
          case OUTPUT_DEP:
            PUSH_ANALYZER(Selector);
            PUSH_ANALYZER(DependencyParser);
            break;
          default:
            break;
        }
        break;
      }

    case  INPUT_SELECTION:  // case 4
      {
        switch (output_layer_) {
          case OUTPUT_POS:
          case OUTPUT_CHUNK:
          case OUTPUT_SELECTION:
            break;
          case OUTPUT_DEP:
            PUSH_ANALYZER(DependencyParser);
            break;
          default:
            break;
        }
        break;
      }

    default:
      break;
  }

  return true;
}
bool load_dictionary_resource(Param *param) {
  std::string rcfile = param->get<std::string>("rcfile");

#ifdef HAVE_GETENV
  if (rcfile.empty()) {
    const char *homedir = getenv("HOME");
    if (homedir) {
      std::string s = MeCab::create_filename(std::string(homedir),
                                             ".mecabrc");
      std::ifstream ifs(s.c_str());
      if (ifs) rcfile = s;
    }
  }

  if (rcfile.empty()) {
    const char *rcenv = getenv("MECABRC");
    if (rcenv) rcfile = rcenv;
  }
#endif

#if defined (HAVE_GETENV) && defined(_WIN32) && !defined(__CYGWIN__)
  if (rcfile.empty()) {
    char buf[BUF_SIZE];
    DWORD len = GetEnvironmentVariable("MECABRC",
                                       buf,
                                       sizeof(buf));
    if (len < sizeof(buf) && len > 0) {
      rcfile = buf;
    }
  }
#endif

#if defined(_WIN32) && !defined(__CYGWIN__)
  HKEY hKey;
  char v[BUF_SIZE];
  DWORD vt;
  DWORD size = sizeof(v);

  if (rcfile.empty()) {
    RegOpenKeyEx(HKEY_LOCAL_MACHINE, "software\\mecab", 0, KEY_READ, &hKey);
    RegQueryValueEx(hKey, "mecabrc", 0, &vt,
                    reinterpret_cast<BYTE *>(v), &size);
    RegCloseKey(hKey);
    if (vt == REG_SZ) rcfile = v;
  }

  if (rcfile.empty()) {
    RegOpenKeyEx(HKEY_CURRENT_USER, "software\\mecab", 0, KEY_READ, &hKey);
    RegQueryValueEx(hKey, "mecabrc", 0, &vt,
                    reinterpret_cast<BYTE *>(v), &size);
    RegCloseKey(hKey);
    if (vt == REG_SZ) rcfile = v;
  }

  /* for Open JTalk
  if (rcfile.empty()) {
    vt = GetModuleFileName(DllInstance, v, size);
    if (vt != 0) {
      char drive[_MAX_DRIVE];
      char dir[_MAX_DIR];
      _splitpath(v, drive, dir, NULL, NULL);
      std::string s = std::string(drive)
          + std::string(dir) + std::string("mecabrc");
      std::ifstream ifs(s.c_str());
      if (ifs) rcfile = s;
    }
  }
  */
#endif

  /* for Open JTalk
  if (rcfile.empty()) rcfile = MECAB_DEFAULT_RC;

  if (!param->load(rcfile.c_str())) return false;
  */

  std::string dicdir = param->get<std::string>("dicdir");
  if (dicdir.empty()) dicdir = ".";  // current
  remove_filename(&rcfile);
  replace_string(&dicdir, "$(rcpath)", rcfile);
  param->set<std::string>("dicdir", dicdir, true);
  dicdir = create_filename(dicdir, DICRC);

  if (!param->load(dicdir.c_str())) return false;

  return true;
}
Ejemplo n.º 3
0
 path&  remove_leaf()            { return remove_filename(); }
Ejemplo n.º 4
0
bool load_dictionary_resource(Param *param) {

	//debug
	std::cout << "[" << __FILE__ << ":" << __LINE__ << "]: "
			<< "load_dictionary_resource(Param *param)" << std::endl;

	///

  std::string rcfile = param->get<std::string>("rcfile");

#ifdef HAVE_GETENV
  if (rcfile.empty()) {

	  //debug
	std::cout << "[" << __FILE__ << ":" << __LINE__ << "]: "
			<< "rcfile.empty()" << std::endl;

	///

    const char *homedir = getenv("HOME");
    if (homedir) {
      const std::string s = MeCab::create_filename(std::string(homedir),
                                                   ".mecabrc");
      std::ifstream ifs(WPATH(s.c_str()));
      if (ifs) {
        rcfile = s;
      }
    }
  }

  if (rcfile.empty()) {
    const char *rcenv = getenv("MECABRC");
    if (rcenv) {
      rcfile = rcenv;
    }
  }
#endif

#if defined (HAVE_GETENV) && defined(_WIN32) && !defined(__CYGWIN__)
  if (rcfile.empty()) {
    scoped_fixed_array<wchar_t, BUF_SIZE> buf;
    const DWORD len = ::GetEnvironmentVariableW(L"MECABRC",
                                                buf.get(),
                                                buf.size());
    if (len < buf.size() && len > 0) {
      rcfile = WideToUtf8(buf.get());
    }
  }
#endif

#if defined(_WIN32) && !defined(__CYGWIN__)
  HKEY hKey;
  scoped_fixed_array<wchar_t, BUF_SIZE> v;
  DWORD vt;
  DWORD size = v.size() * sizeof(v[0]);

  if (rcfile.empty()) {
    ::RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"software\\mecab", 0, KEY_READ, &hKey);
    ::RegQueryValueExW(hKey, L"mecabrc", 0, &vt,
                       reinterpret_cast<BYTE *>(v.get()), &size);
    ::RegCloseKey(hKey);
    if (vt == REG_SZ) {
      rcfile = WideToUtf8(v.get());
    }
  }

  if (rcfile.empty()) {
    ::RegOpenKeyExW(HKEY_CURRENT_USER, L"software\\mecab", 0, KEY_READ, &hKey);
    ::RegQueryValueExW(hKey, L"mecabrc", 0, &vt,
                       reinterpret_cast<BYTE *>(v.get()), &size);
    ::RegCloseKey(hKey);
    if (vt == REG_SZ) {
      rcfile = WideToUtf8(v.get());
    }
  }

  if (rcfile.empty()) {
    vt = ::GetModuleFileNameW(DllInstance, v.get(), size);
    if (vt != 0) {
      scoped_fixed_array<wchar_t, _MAX_DRIVE> drive;
      scoped_fixed_array<wchar_t, _MAX_DRIVE> dir;
      _wsplitpath(v.get(), drive.get(), dir.get(), NULL, NULL);
      const std::wstring path =
          std::wstring(drive.get()) + std::wstring(dir.get()) + L"mecabrc";
      if (::GetFileAttributesW(path.c_str()) != -1) {
        rcfile = WideToUtf8(path);
      }
    }
  }
#endif

  if (rcfile.empty()) {
    rcfile = MECAB_DEFAULT_RC;
  }

  if (!param->load(rcfile.c_str())) {
    return false;
  }

  std::string dicdir = param->get<std::string>("dicdir");
  if (dicdir.empty()) {
    dicdir = ".";  // current
  }
  remove_filename(&rcfile);
  replace_string(&dicdir, "$(rcpath)", rcfile);
  param->set<std::string>("dicdir", dicdir, true);
  dicdir = create_filename(dicdir, DICRC);

  if (!param->load(dicdir.c_str())) {
    return false;
  }

  return true;
}
Ejemplo n.º 5
0
 path & replace_filename (path const & replacement)
 {
     remove_filename();
     base_class::operator /= (replacement);
     return *this;
 }