bool Dictionary::open(const char *file, const char *mode) { filename_ = file; MMAP_OPEN(char, dmmap_, filename_, mode); CHECK_CLOSE_FALSE(dmmap_->size() >= 100) << "dictionary file is broken: " << file; const char *ptr = dmmap_->begin(); unsigned int dsize; unsigned int tsize; unsigned int fsize; unsigned int magic; unsigned int dummy; read_static<unsigned int>(&ptr, magic); CHECK_CLOSE_FALSE((magic ^ DictionaryMagicID) == dmmap_->size()) << "dictionary file is broken: " << file; read_static<unsigned int>(&ptr, version_); CHECK_CLOSE_FALSE(version_ == DIC_VERSION) << "incompatible version: " << version_; read_static<unsigned int>(&ptr, type_); read_static<unsigned int>(&ptr, lexsize_); read_static<unsigned int>(&ptr, lsize_); read_static<unsigned int>(&ptr, rsize_); read_static<unsigned int>(&ptr, dsize); read_static<unsigned int>(&ptr, tsize); read_static<unsigned int>(&ptr, fsize); read_static<unsigned int>(&ptr, dummy); charset_ = ptr; ptr += 32; da_.set_array(reinterpret_cast<void *>(const_cast<char*>(ptr))); ptr += dsize; token_ = reinterpret_cast<const Token *>(ptr); ptr += tsize; feature_ = ptr; ptr += fsize; CHECK_CLOSE_FALSE(ptr == dmmap_->end()) << "dictionary file is broken: " << file; return true; }
bool TaggerImpl::open(Param *param) { close(); if (!param->help_version()) { close(); return false; } nbest_ = param->get<int>("nbest"); vlevel_ = param->get<int>("verbose"); std::string model = param->get<std::string>("model"); feature_index_ = new DecoderFeatureIndex(); CHECK_CLOSE_FALSE(feature_index_->open(model.c_str(), 0)) << feature_index_->what(); double c = param->get<double>("cost-factor"); if (c <= 0.0) { WHAT << "cost factor must be positive"; close(); return false; } feature_index_->set_cost_factor(c); ysize_ = feature_index_->ysize(); return true; }
bool TokenizerImpl<N, P>::open(const Param ¶m) { close(); const std::string prefix = param.template get<std::string>("dicdir"); const char *mode = param.template get<bool>("open-mutable-dictionary") ? "r+" : "r"; CHECK_CLOSE_FALSE(unkdic_.open(create_filename (prefix, UNK_DIC_FILE).c_str(), mode)) << unkdic_.what(); CHECK_CLOSE_FALSE(property_.open(param)) << property_.what(); Dictionary *sysdic = new Dictionary; CHECK_CLOSE_FALSE(sysdic->open (create_filename(prefix, SYS_DIC_FILE).c_str(), mode)) << sysdic->what(); CHECK_CLOSE_FALSE(sysdic->type() == 0) << "not a system dictionary: " << prefix; property_.set_charset(sysdic->charset()); dic_.push_back(sysdic); const std::string userdic = param.template get<std::string>("userdic"); if (!userdic.empty()) { char buf[BUF_SIZE]; char *_dic[BUF_SIZE]; std::strncpy(buf, userdic.c_str(), sizeof(buf)); size_t n = tokenizeCSV(buf, _dic, sizeof(_dic)); for (size_t i = 0; i < n; ++i) { Dictionary *d = new Dictionary; CHECK_CLOSE_FALSE(d->open(_dic[i], mode)) << d->what(); CHECK_CLOSE_FALSE(d->type() == 1) << "not a user dictionary: " << _dic[i]; CHECK_CLOSE_FALSE(sysdic->isCompatible(*d)) << "incompatible dictionary: " << _dic[i]; dic_.push_back(d); } } dictionary_info_ = 0; dictionary_info_freelist_.free(); for (int i = static_cast<int>(dic_.size() - 1); i >= 0; --i) { DictionaryInfo *d = dictionary_info_freelist_.alloc(); d->next = dictionary_info_; d->filename = dic_[i]->filename(); d->charset = dic_[i]->charset(); d->size = dic_[i]->size(); d->lsize = dic_[i]->lsize(); d->rsize = dic_[i]->rsize(); d->type = dic_[i]->type(); d->version = dic_[i]->version(); dictionary_info_ = d; } unk_tokens_.clear(); for (size_t i = 0; i < property_.size(); ++i) { const char *key = property_.name(i); Dictionary::result_type n = unkdic_.exactMatchSearch(key); CHECK_CLOSE_FALSE(n.value != -1) << "cannot find UNK category: " << key; const Token *token = unkdic_.token(n); size_t size = unkdic_.token_size(n); unk_tokens_.push_back(std::make_pair(token, size)); } space_ = property_.getCharInfo(0x20); // ad-hoc bos_feature_.reset_string(param.template get<std::string>("bos-feature")); const std::string tmp = param.template get<std::string>("unk-feature"); unk_feature_.reset(0); if (!tmp.empty()) unk_feature_.reset_string(tmp); CHECK_CLOSE_FALSE(*bos_feature_ != '\0') << "bos-feature is undefined in dicrc"; max_grouping_size_ = param.template get<size_t>("max-grouping-size"); if (max_grouping_size_ == 0) max_grouping_size_ = DEFAULT_MAX_GROUPING_SIZE; return true; }