Beispiel #1
0
FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig)
{
  FactorType factorType = 0;
  string filePath;
  util::LoadMethod load_method = util::POPULATE_OR_READ;

  util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
  ++argument; // KENLM

  util::StringStream line;
  line << "KENLM";

  for (; argument; ++argument) {
    const char *equals = std::find(argument->data(),
                                   argument->data() + argument->size(), '=');
    UTIL_THROW_IF2(equals == argument->data() + argument->size(),
                   "Expected = in KenLM argument " << *argument);
    StringPiece name(argument->data(), equals - argument->data());
    StringPiece value(equals + 1,
                      argument->data() + argument->size() - equals - 1);
    if (name == "factor") {
      factorType = boost::lexical_cast<FactorType>(value);
    } else if (name == "order") {
      // Ignored
    } else if (name == "path") {
      filePath.assign(value.data(), value.size());
    } else if (name == "lazyken") {
      // deprecated: use load instead.
      load_method =
        boost::lexical_cast<bool>(value) ?
        util::LAZY : util::POPULATE_OR_READ;
    } else if (name == "load") {
      if (value == "lazy") {
        load_method = util::LAZY;
      } else if (value == "populate_or_lazy") {
        load_method = util::POPULATE_OR_LAZY;
      } else if (value == "populate_or_read" || value == "populate") {
        load_method = util::POPULATE_OR_READ;
      } else if (value == "read") {
        load_method = util::READ;
      } else if (value == "parallel_read") {
        load_method = util::PARALLEL_READ;
      } else {
        UTIL_THROW2("Unknown KenLM load method " << value);
      }
    } else {
      // pass to base class to interpret
      line << " " << name << "=" << value;
    }
  }

  return ConstructKenLM(startInd, line.str(), filePath, factorType, load_method);
}
Beispiel #2
0
LanguageModel* CreateLanguageModel(LMImplementation lmImplementation
                                   , const std::vector<FactorType> &factorTypes
                                   , size_t nGramOrder
                                   , const std::string &languageModelFile
                                   , int dub)
{

    LOGE("[mgjang] in CreateLanguageModel function \n");

    if (lmImplementation == Ken || lmImplementation == LazyKen) {
        return ConstructKenLM(languageModelFile, factorTypes[0], lmImplementation == LazyKen);
    }
    LanguageModelImplementation *lm = NULL;
    switch (lmImplementation) {
    case RandLM:
#ifdef LM_RAND
        lm = NewRandLM();
#endif
        break;
    case ORLM:
        lm = new LanguageModelORLM();
        break;
    case Remote:
#ifdef LM_REMOTE
        lm = new LanguageModelRemote();
#endif
        break;

    case SRI:
#ifdef LM_SRI
        lm = new LanguageModelSRI();
#endif
        break;
    case IRST:
        LOGE("[mgjang] case IRST\n");
#ifdef LM_IRST
        LOGE("[mgjang] check definition IRST\n");
        lm = new LanguageModelIRST(dub);
#endif
        break;
    case Joint:
#ifdef LM_SRI
        lm = new LanguageModelJoint(new LanguageModelSRI());
#endif
        break;
    case ParallelBackoff:
#ifdef LM_SRI
        lm = NewParallelBackoff();
#endif
        break;
    case LDHTLM:
#ifdef LM_LDHT
        return ConstructLDHTLM(languageModelFile,
                               scoreIndexManager,
                               factorTypes[0]);
#endif
        break;
    default:
        break;
    }

    if (lm == NULL) {
        UserMessage::Add("Language model type unknown. Probably not compiled into library");
        LOGE("[mgjang] Language model type unknown. Probably not compiled into library\n");
        return NULL;
    } else {
        LOGE("[mgjang] LM instance is created\n");
        switch (lm->GetLMType()) {
        case SingleFactor:
            LOGE("[mgjang] case SingleFactor\n");
            if (! static_cast<LanguageModelSingleFactor*>(lm)->Load(languageModelFile, factorTypes[0], nGramOrder)) {
                cerr << "single factor model failed" << endl;
                delete lm;
                lm = NULL;
                LOGE("[mgjang] single factor model failed\n");
            }
            break;
        case MultiFactor:
            LOGE("[mgjang] case MultiFactor\n");
            if (! static_cast<LanguageModelMultiFactor*>(lm)->Load(languageModelFile, factorTypes, nGramOrder)) {
                cerr << "multi factor model failed" << endl;
                delete lm;
                lm = NULL;
                LOGE("[mgjang] multi factor model failed\n");
            }
            break;
        }
    }

    return new LMRefCount(lm);
}