FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig) { FactorType factorType = 0; string filePath; util::LoadMethod load_method = util::POPULATE_OR_READ; util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' '); ++argument; // KENLM util::StringStream line; line << "KENLM"; for (; argument; ++argument) { const char *equals = std::find(argument->data(), argument->data() + argument->size(), '='); UTIL_THROW_IF2(equals == argument->data() + argument->size(), "Expected = in KenLM argument " << *argument); StringPiece name(argument->data(), equals - argument->data()); StringPiece value(equals + 1, argument->data() + argument->size() - equals - 1); if (name == "factor") { factorType = boost::lexical_cast<FactorType>(value); } else if (name == "order") { // Ignored } else if (name == "path") { filePath.assign(value.data(), value.size()); } else if (name == "lazyken") { // deprecated: use load instead. load_method = boost::lexical_cast<bool>(value) ? util::LAZY : util::POPULATE_OR_READ; } else if (name == "load") { if (value == "lazy") { load_method = util::LAZY; } else if (value == "populate_or_lazy") { load_method = util::POPULATE_OR_LAZY; } else if (value == "populate_or_read" || value == "populate") { load_method = util::POPULATE_OR_READ; } else if (value == "read") { load_method = util::READ; } else if (value == "parallel_read") { load_method = util::PARALLEL_READ; } else { UTIL_THROW2("Unknown KenLM load method " << value); } } else { // pass to base class to interpret line << " " << name << "=" << value; } } return ConstructKenLM(startInd, line.str(), filePath, factorType, load_method); }
LanguageModel* CreateLanguageModel(LMImplementation lmImplementation , const std::vector<FactorType> &factorTypes , size_t nGramOrder , const std::string &languageModelFile , int dub) { LOGE("[mgjang] in CreateLanguageModel function \n"); if (lmImplementation == Ken || lmImplementation == LazyKen) { return ConstructKenLM(languageModelFile, factorTypes[0], lmImplementation == LazyKen); } LanguageModelImplementation *lm = NULL; switch (lmImplementation) { case RandLM: #ifdef LM_RAND lm = NewRandLM(); #endif break; case ORLM: lm = new LanguageModelORLM(); break; case Remote: #ifdef LM_REMOTE lm = new LanguageModelRemote(); #endif break; case SRI: #ifdef LM_SRI lm = new LanguageModelSRI(); #endif break; case IRST: LOGE("[mgjang] case IRST\n"); #ifdef LM_IRST LOGE("[mgjang] check definition IRST\n"); lm = new LanguageModelIRST(dub); #endif break; case Joint: #ifdef LM_SRI lm = new LanguageModelJoint(new LanguageModelSRI()); #endif break; case ParallelBackoff: #ifdef LM_SRI lm = NewParallelBackoff(); #endif break; case LDHTLM: #ifdef LM_LDHT return ConstructLDHTLM(languageModelFile, scoreIndexManager, factorTypes[0]); #endif break; default: break; } if (lm == NULL) { UserMessage::Add("Language model type unknown. Probably not compiled into library"); LOGE("[mgjang] Language model type unknown. Probably not compiled into library\n"); return NULL; } else { LOGE("[mgjang] LM instance is created\n"); switch (lm->GetLMType()) { case SingleFactor: LOGE("[mgjang] case SingleFactor\n"); if (! static_cast<LanguageModelSingleFactor*>(lm)->Load(languageModelFile, factorTypes[0], nGramOrder)) { cerr << "single factor model failed" << endl; delete lm; lm = NULL; LOGE("[mgjang] single factor model failed\n"); } break; case MultiFactor: LOGE("[mgjang] case MultiFactor\n"); if (! static_cast<LanguageModelMultiFactor*>(lm)->Load(languageModelFile, factorTypes, nGramOrder)) { cerr << "multi factor model failed" << endl; delete lm; lm = NULL; LOGE("[mgjang] multi factor model failed\n"); } break; } } return new LMRefCount(lm); }