/** * \brief Private constructor with ucam::util::RegistryPO object and index to a particular language model. * This constructor is only used when several language models are loaded. * If the user wants to load several language models (e.g. --lm.load=lm1,lm2,lm3,lm4 and --lm.scale=0.25,0.25,0.25 ). * The second and following instances of LoadLanguageModelTask will be created using the private constructor. * In the constructor itself the next language model loader task is appended. * this constructor, which has an index to the actual language model that must be loaded. * \param rg : ucam::util::RegistryPO object, containing user parameters. * \param index : Index to the actual language model. * \param lmload : key word to access the registry object for language models * \param lmscale : key word to access the registry object for language model scales. */ LoadLanguageModelTask ( const ucam::util::RegistryPO& rg , uint index , const std::string& lmload = HifstConstants::kLmLoad, const std::string& lmscale = HifstConstants::kLmFeatureweights , const std::string& lmwp = HifstConstants::kLmWordPenalty, const std::string& wordmapkey = HifstConstants::kLmWordmap ) : rg_ ( rg ), lmkey_ ( lmload ), previous_ ( "" ), built_ ( false ), index_ ( index ), isintegermapped_ (!rg.exists (wordmapkey) || rg.get<std::string> (wordmapkey) == ""), wordmapkey_ (wordmapkey), lmfile_ ( rg.getVectorString ( lmload , index ) ) { LDEBUG ( "LM loader using parameters " << lmload << "/" << lmscale << ", and key " << lmkey_ << ",index=" << index_ << ",wordmapkey=" << wordmapkey_); setLanguageModelScale ( lmscale ); setLanguageModelWordPenalty ( lmwp ); if ( rg.getVectorString ( lmload ).size() > index_ + 1 ) { LDEBUG ( "Appending Language model..." ); this->appendTask ( new LoadLanguageModelTask ( rg, index_ + 1, lmload, lmscale , lmwp , wordmapkey ) ); } LDEBUG ( "." ); };
///Constructor with registry object, offset and keys LoadSparseWeightFlowerLatticeTask ( const ucam::util::RegistryPO& rg, const unsigned offset = 1, //minimum offset considering only one language model... const std::string& alignmentlattices = HifstConstants::kRuleflowerlatticeFilterbyalilats, const std::string& grammarloadkey = HifstConstants::kRuleflowerlatticeLoad, const std::string& grammarstorekey = HifstConstants::kRuleflowerlatticeStore ) : offset_ ( offset ), rg_ ( rg ), alilats_ ( rg.exists ( alignmentlattices ) ? rg.get<std::string> ( alignmentlattices ) : "" ), grammar_ ( rg.get<std::string> ( grammarloadkey ) ), fscales_ ( fst::TropicalSparseTupleWeight<float>::Params() ), filterbyalilats_ ( rg.exists ( alignmentlattices ) ), grammarstorekey_ ( grammarstorekey ) { };
///Constructor with ucam::util::RegistryPO object ApplyLanguageModelTask ( const ucam::util::RegistryPO& rg , const std::string& lmkey = HifstConstants::kLmLoad , const std::string& latticeloadkey = "lm.lattice.load", const std::string& latticestorekey = "lm.lattice.store", bool deletelmscores = false ) : lmkey_ ( lmkey ), latticeloadkey_ ( latticeloadkey ), latticestorekey_ ( latticestorekey ), natlog_ ( !rg.exists ( HifstConstants::kLmLogTen ) ), deletelmscores_ (deletelmscores) { };
/** * \brief Public constructor. If the user wants to load several language models (e.g. --lm.load=lm1,lm2,lm3,lm4 and --lm.scale=0.25,0.25,0.25 ), * the second and following instances of LoadLanguageModelTask will be created using the private constructor (see below), which has an index to the actual language model that must be loaded. * For the public constructor, the index is set to 0. * \param rg ucam::util::RegistryPO object, containing user parameters. * \param lmload key word to access the registry object for language models * \param lmscale key word to access the registry object for language model scales. * \param forceone To force the loading of only one language model (i.e. lm1 with scale 0.25). */ LoadLanguageModelTask ( const ucam::util::RegistryPO& rg , const std::string& lmload = HifstConstants::kLmLoad , const std::string& lmscale = HifstConstants::kLmFeatureweights //if rg.get(lmscale)=="" the scale will default to 1 , const std::string& lmwp = HifstConstants::kLmWordPenalty //if rg.get(wps)=="" the scale will default to 0 , const std::string& wordmapkey = HifstConstants::kLmWordmap , bool forceone = false ) : rg_ ( rg ) , lmkey_ ( lmload ) , previous_ ( "" ) , built_ ( false ) , index_ ( 0 ) , isintegermapped_ (!rg.exists (wordmapkey) || rg.get<std::string> (wordmapkey) == "") , wordmapkey_ (wordmapkey) , lmfile_ ( rg.getVectorString ( lmload , 0 ) ) { LDEBUG ( "LM loader using parameters " << lmload << "/" << lmscale << "/" << lmwp << ", and key " << lmkey_ << ",index=" << index_ << ",wordmap=" << wordmapkey_); FORCELINFO("Language model loader for " << lmfile_() ); setLanguageModelScale ( lmscale ); setLanguageModelWordPenalty ( lmwp ); if ( rg_.getVectorString ( lmload ).size() > 1 ) { if ( !forceone ) { LINFO ( "Appending Language model..." ); this->appendTask ( new LoadLanguageModelTask ( rg_, 1, lmload, lmscale , lmwp , wordmapkey ) ); } else { LWARN ( "Only one loaded for " << lmload << ". Extra language models are being ignored" ); } } LINFO ( "Finished constructor!" ); };
int SampleWFSAs( ucam::util::RegistryPO const& rg) { using ucam::util::oszfstream; using ucam::util::PatternAddress; PatternAddress<unsigned> input(rg.get<std::string>(HifstConstants::kInput.c_str())); PatternAddress<unsigned> output(rg.get<std::string>(HifstConstants::kOutput.c_str())); unsigned n = rg.get<unsigned>(HifstConstants::kNbest.c_str()); unsigned ns = rg.get<unsigned>(HifstConstants::kNSamples.c_str()); float alpha = rg.get<float>(HifstConstants::kAlpha.c_str()); bool negatives = rg.exists(HifstConstants::kNegativeExamples.c_str()); bool binarytarget = rg.exists(HifstConstants::kBinaryTarget.c_str()); bool negate = !rg.exists(HifstConstants::kDontNegate.c_str()); std::string extTok = rg.getString(HifstConstants::kExternalTokenizer.c_str()); std::string wMap = rg.getString(HifstConstants::kWordMap.c_str()); // std::string wMap = ""; // bool printOutputLabels = rg.exists(HifstConstants::kPrintOutputLabels.c_str()); std::string refFiles; bool intRefs; if (rg.exists(HifstConstants::kWordRefs)) { refFiles = rg.getString(HifstConstants::kWordRefs); intRefs = false; } if (rg.exists(HifstConstants::kIntRefs)) { refFiles = rg.getString(HifstConstants::kIntRefs); intRefs = true; } std::cerr << refFiles << "**" <<std::endl; ucam::fsttools::BleuScorer bleuScorer(refFiles, extTok, n, intRefs, wMap); ucam::fsttools::TuneSet< Arc > tuneSet(rg); ucam::fsttools::Bleu ibs = tuneSet.ComputeBleu(bleuScorer); FORCELINFO("Set level Bleu: " << ibs); unsigned seed = time(NULL); if (rg.exists(HifstConstants::kRandomSeed.c_str())) seed = rg.get<unsigned>(HifstConstants::kRandomSeed.c_str()); FORCELINFO("random seed: " << seed); srand(seed); boost::scoped_ptr<oszfstream> out; std::string old; for (unsigned i=0; i<tuneSet.cachedLats.size(); i++) { fst::VectorFst<Arc> ifst(*tuneSet.cachedLats[i]); fst::VectorFst<Arc> nfst; if (old != output (i) ) { out.reset(new oszfstream (output(i))); old = output(i); } if (!ifst.NumStates() ) { FORCELINFO("EMPTY: " << i); continue; } // Projecting allows unique to work for all cases. fst::Project(&ifst, (printOutputLabels?PROJECT_OUTPUT:PROJECT_INPUT)); ShortestPath (ifst, &nfst, n, true ); std::vector<HypT> hyps; fst::printStrings<Arc> (nfst, &hyps); std::vector< LabeledFeature< float, typename Arc::Weight> > fea = ProSBLEUSample<typename Arc::Weight, HypT>(bleuScorer, hyps, i, n, ns, alpha, negatives, negate); for (unsigned s=0; s<fea.size(); s++) { *out << (binarytarget ? (fea[s].value > 0.0 ? 1 : 0) : fea[s].value); *out << " " << fea[s].fea << std::endl; } } FORCELINFO("Done Sample WFSAs"); };