void LanguageModelIRST::Load(AllOptions::ptr const& opts) { FactorCollection &factorCollection = FactorCollection::Instance(); m_lmtb = m_lmtb->CreateLanguageModel(m_filePath); if (m_lmtb_size > 0) m_lmtb->setMaxLoadedLevel(m_lmtb_size); m_lmtb->load(m_filePath); d=m_lmtb->getDict(); d->incflag(1); m_nGramOrder = m_lmtb_size = m_lmtb->maxlevel(); // LM can be ok, just outputs warnings // Mauro: in the original, the following two instructions are wrongly switched: m_unknownId = d->oovcode(); // at the level of micro tags m_empty = -1; // code for an empty position CreateFactors(factorCollection); VERBOSE(1, GetScoreProducerDescription() << " LanguageModelIRST::Load() m_unknownId=" << m_unknownId << std::endl); //install caches to save time (only if PS_CACHE_ENABLE is defined through compilation flags) m_lmtb->init_caches(m_lmtb_size>2?m_lmtb_size-1:2); if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub); }
SourceGHKMTreeInputMatchFeature::SourceGHKMTreeInputMatchFeature(const std::string &line) : StatelessFeatureFunction(2, line) { std::cerr << GetScoreProducerDescription() << "Initializing feature..."; ReadParameters(); std::cerr << " Done." << std::endl; }
Model1Feature::Model1Feature(const std::string &line) : StatelessFeatureFunction(1, line) { VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ..."); ReadParameters(); VERBOSE(1, " Done."); }
void FeatureFunction::SetParameter(const std::string& key, const std::string& value) { if (key == "tuneable") { m_tuneable = Scan<bool>(value); } else if (key == "tuneable-components") { UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription() << ": tuneable-components cannot be set if tuneable=false"); SetTuneableComponents(value); } else if (key == "require-sorting-after-source-context") { m_requireSortingAfterSourceContext = Scan<bool>(value); } else if (key == "verbosity") { m_verbosity = Scan<size_t>(value); } else if (key == "filterable") { //ignore } else { UTIL_THROW2(GetScoreProducerDescription() << ": Unknown argument " << key << "=" << value); } }
LanguageModelIRST::LanguageModelIRST(const std::string &line) :LanguageModelSingleFactor(line) ,m_lmtb_dub(0), m_lmtb_size(0) { const StaticData &staticData = StaticData::Instance(); int threadCount = staticData.ThreadCount(); if (threadCount != 1) { throw runtime_error("Error: " + SPrint(threadCount) + " number of threads specified but IRST LM is not threadsafe."); } ReadParameters(); VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_dub:|" << m_lmtb_dub << "|" << std::endl); VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_filePath:|" << m_filePath << "|" << std::endl); VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_factorType:|" << m_factorType << "|" << std::endl); VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_size:|" << m_lmtb_size << "|" << std::endl); }
void Model1Feature::Load() { FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading source vocabulary from file " << m_fileNameVcbS << " ..."); Model1Vocabulary vcbS; vcbS.Load(m_fileNameVcbS); FEATUREVERBOSE2(2, " Done." << std::endl); FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading target vocabulary from file " << m_fileNameVcbT << " ..."); Model1Vocabulary vcbT; vcbT.Load(m_fileNameVcbT); FEATUREVERBOSE2(2, " Done." << std::endl); FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading model 1 lexical translation table from file " << m_fileNameModel1 << " ..."); m_model1.Load(m_fileNameModel1,vcbS,vcbT); FEATUREVERBOSE2(2, " Done." << std::endl); FactorCollection &factorCollection = FactorCollection::Instance(); m_emptyWord = factorCollection.GetFactor(Model1Vocabulary::GIZANULL,false); UTIL_THROW_IF2(m_emptyWord==NULL, GetScoreProducerDescription() << ": Factor for GIZA empty word does not exist."); }
void FeatureFunction::SetTuneableComponents(const std::string& value) { std::vector<std::string> toks = Tokenize(value,","); UTIL_THROW_IF2(toks.empty(), GetScoreProducerDescription() << ": Empty tuneable-components"); UTIL_THROW_IF2(toks.size()!=m_numScoreComponents, GetScoreProducerDescription() << ": tuneable-components value has to be a comma-separated list of " << m_numScoreComponents << " boolean values"); m_tuneableComponents.resize(m_numScoreComponents); m_numTuneableComponents = m_numScoreComponents; for (size_t i = 0; i < toks.size(); ++i) { m_tuneableComponents[i] = Scan<bool>(toks[i]); if (!m_tuneableComponents[i]) { --m_numTuneableComponents; } } }
RulePairUnlexicalizedSource::RulePairUnlexicalizedSource(const std::string &line) : StatelessFeatureFunction(1, line) , m_glueRules(false) , m_nonGlueRules(true) , m_glueTargetLHSStr("Q") { VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ..."); ReadParameters(); FactorCollection &factorCollection = FactorCollection::Instance(); m_glueTargetLHS = factorCollection.AddFactor(m_glueTargetLHSStr, true); VERBOSE(1, " Done."); }
WordTranslationFeature::WordTranslationFeature(const std::string &line) :StatelessFeatureFunction(0, line) ,m_unrestricted(true) ,m_simple(true) ,m_sourceContext(false) ,m_targetContext(false) ,m_domainTrigger(false) ,m_ignorePunctuation(false) { VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ..."); ReadParameters(); if (m_simple == 1) VERBOSE(1, " Using simple word translations."); if (m_sourceContext == 1) VERBOSE(1, " Using source context."); if (m_targetContext == 1) VERBOSE(1, " Using target context."); if (m_domainTrigger == 1) VERBOSE(1, " Using domain triggers."); // compile a list of punctuation characters if (m_ignorePunctuation) { VERBOSE(1, " Ignoring punctuation for triggers."); char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~="; for (size_t i=0; i < sizeof(punctuation)-1; ++i) { m_punctuationHash[punctuation[i]] = 1; } } VERBOSE(1, " Done." << std::endl); // TODO not sure about this /* if (weight[0] != 1) { AddSparseProducer(wordTranslationFeature); VERBOSE(1, "wt sparse producer weight: " << weight[0] << std::endl); if (m_mira) m_metaFeatureProducer = new MetaFeatureProducer("wt"); } if (m_parameter->GetParam("report-sparse-features").size() > 0) { wordTranslationFeature->SetSparseFeatureReporting(); } */ }
void SourceGHKMTreeInputMatchFeature::SetParameter(const std::string& key, const std::string& value) { UTIL_THROW(util::Exception, GetScoreProducerDescription() << ": Unknown parameter " << key << "=" << value); }