/** * \brief Private constructor with ucam::util::RegistryPO object and index to a particular language model. * This constructor is only used when several language models are loaded. * If the user wants to load several language models (e.g. --lm.load=lm1,lm2,lm3,lm4 and --lm.scale=0.25,0.25,0.25 ). * The second and following instances of LoadLanguageModelTask will be created using the private constructor. * In the constructor itself the next language model loader task is appended. * this constructor, which has an index to the actual language model that must be loaded. * \param rg : ucam::util::RegistryPO object, containing user parameters. * \param index : Index to the actual language model. * \param lmload : key word to access the registry object for language models * \param lmscale : key word to access the registry object for language model scales. */ LoadLanguageModelTask ( const ucam::util::RegistryPO& rg , uint index , const std::string& lmload = HifstConstants::kLmLoad, const std::string& lmscale = HifstConstants::kLmFeatureweights , const std::string& lmwp = HifstConstants::kLmWordPenalty, const std::string& wordmapkey = HifstConstants::kLmWordmap ) : rg_ ( rg ), lmkey_ ( lmload ), previous_ ( "" ), built_ ( false ), index_ ( index ), isintegermapped_ (!rg.exists (wordmapkey) || rg.get<std::string> (wordmapkey) == ""), wordmapkey_ (wordmapkey), lmfile_ ( rg.getVectorString ( lmload , index ) ) { LDEBUG ( "LM loader using parameters " << lmload << "/" << lmscale << ", and key " << lmkey_ << ",index=" << index_ << ",wordmapkey=" << wordmapkey_); setLanguageModelScale ( lmscale ); setLanguageModelWordPenalty ( lmwp ); if ( rg.getVectorString ( lmload ).size() > index_ + 1 ) { LDEBUG ( "Appending Language model..." ); this->appendTask ( new LoadLanguageModelTask ( rg, index_ + 1, lmload, lmscale , lmwp , wordmapkey ) ); } LDEBUG ( "." ); };
///Constructor with registry object and several keys to access data object and registry HiFSTTask ( const ucam::util::RegistryPO& rg , const std::string& outputkey = HifstConstants::kHifstLatticeStore, const std::string& locallmkey = HifstConstants::kHifstLocalpruneLmLoad, const std::string& fullreferencelatticekey = HifstConstants::kReferencefilterNosubstringStore , const std::string& lmkey = HifstConstants::kLmLoad ) : optimize_ (rg.getBool (HifstConstants::kHifstOptimizecells) ), numlocallm_ (rg.getVectorString (locallmkey).size() ), warned_ (false), rtnfiles_ (rg.get<std::string> (HifstConstants::kHifstWritertn) ), fullreferencelatticekey_ ( fullreferencelatticekey ), lmkey_ ( lmkey ), locallmkey_ ( locallmkey ), outputkey_ ( outputkey ), piscount_ ( 0 ), aligner_ ( rg.getBool ( HifstConstants::kHifstAlilatsmode ) ), // cellredm_ ( rg.getBool ( "hifst.cellredm" ) ), // finalredm_ ( rg.getBool ( "hifst.finalredm" ) ), hipdtmode_ (rg.getBool (HifstConstants::kHifstUsepdt) ), rtnopt_ (rg.getBool (HifstConstants::kHifstRtnopt) ), replacefstbyarc_ ( rg.getSetString ( HifstConstants::kHifstReplacefstbyarcNonterminals ) ), replacefstbyarcexceptions_ ( rg.getSetString ( HifstConstants::kHifstReplacefstbyarcExceptions ) ), replacefstbynumstates_ ( rg.get<unsigned> ( HifstConstants::kHifstReplacefstbyarcNumstates ) ), localprune_ ( rg.getBool ( HifstConstants::kHifstLocalpruneEnable ) ), pruneweight_ ( rg.get<float> ( HifstConstants::kHifstPrune ) ), numstatesthreshold_ ( rg.get<unsigned> ( HifstConstants::kHifstLocalpruneNumstates ) ), lpctuples_ ( rg.getVectorString ( HifstConstants::kHifstLocalpruneConditions ) ) { LINFO ("Number of local language models=" << numlocallm_); LINFO ("aligner mode=" << aligner_); LINFO ("localprune mode=" << localprune_); USER_CHECK ( ! ( lpc_.size() % 4 ), "local pruning conditions are defined by tuples of 4 elements: category,x,y,Number-of-states. Category is a string and x,y are int. Number of states is unsigned" ); USER_CHECK ( (localprune_ && numlocallm_) || ( localprune_ && !numlocallm_ && aligner_ ) || (!localprune_) , "If you want to do cell pruning in translation, you should use a language model for local pruning. Check --hifst.localprune.lm.load and --hifst.localprune.enable.\n"); optimize.setAlignMode (aligner_); if (hipdtmode_) { LINFO ("Hipdt mode enabled!"); } if (!rtnopt_) { LINFO ("RTN openfst optimizations will not be applied"); } LDEBUG ( "Hifst constructor done!" ); };
/** * \brief Public constructor. If the user wants to load several language models (e.g. --lm.load=lm1,lm2,lm3,lm4 and --lm.scale=0.25,0.25,0.25 ), * the second and following instances of LoadLanguageModelTask will be created using the private constructor (see below), which has an index to the actual language model that must be loaded. * For the public constructor, the index is set to 0. * \param rg ucam::util::RegistryPO object, containing user parameters. * \param lmload key word to access the registry object for language models * \param lmscale key word to access the registry object for language model scales. * \param forceone To force the loading of only one language model (i.e. lm1 with scale 0.25). */ LoadLanguageModelTask ( const ucam::util::RegistryPO& rg , const std::string& lmload = HifstConstants::kLmLoad , const std::string& lmscale = HifstConstants::kLmFeatureweights //if rg.get(lmscale)=="" the scale will default to 1 , const std::string& lmwp = HifstConstants::kLmWordPenalty //if rg.get(wps)=="" the scale will default to 0 , const std::string& wordmapkey = HifstConstants::kLmWordmap , bool forceone = false ) : rg_ ( rg ) , lmkey_ ( lmload ) , previous_ ( "" ) , built_ ( false ) , index_ ( 0 ) , isintegermapped_ (!rg.exists (wordmapkey) || rg.get<std::string> (wordmapkey) == "") , wordmapkey_ (wordmapkey) , lmfile_ ( rg.getVectorString ( lmload , 0 ) ) { LDEBUG ( "LM loader using parameters " << lmload << "/" << lmscale << "/" << lmwp << ", and key " << lmkey_ << ",index=" << index_ << ",wordmap=" << wordmapkey_); FORCELINFO("Language model loader for " << lmfile_() ); setLanguageModelScale ( lmscale ); setLanguageModelWordPenalty ( lmwp ); if ( rg_.getVectorString ( lmload ).size() > 1 ) { if ( !forceone ) { LINFO ( "Appending Language model..." ); this->appendTask ( new LoadLanguageModelTask ( rg_, 1, lmload, lmscale , lmwp , wordmapkey ) ); } else { LWARN ( "Only one loaded for " << lmload << ". Extra language models are being ignored" ); } } LINFO ( "Finished constructor!" ); };