コード例 #1
ファイル: task.loadlm.hpp プロジェクト: ucam-smt/ucam-smt
  * \brief Private constructor with ucam::util::RegistryPO object and index to a particular language model.
  * This constructor is only used when several language models are loaded.
  * If the user wants to load several language models  (e.g. --lm.load=lm1,lm2,lm3,lm4 and --lm.scale=0.25,0.25,0.25 ).
  * The second and following instances of LoadLanguageModelTask will be created using the private constructor.
  * In the constructor itself the next language model loader task is appended.
  * this constructor, which has an index to the actual language model that must be loaded.
  * \param rg      :  ucam::util::RegistryPO object, containing user parameters.
  * \param index   :  Index to the actual language model.
  * \param lmload  :  key word to access the registry object for language models
  * \param lmscale :  key word to access the registry object for language model scales.
 LoadLanguageModelTask ( const ucam::util::RegistryPO& rg ,
                         uint index ,
                         const std::string& lmload = HifstConstants::kLmLoad,
                         const std::string& lmscale = HifstConstants::kLmFeatureweights ,
                         const std::string& lmwp = HifstConstants::kLmWordPenalty,
                         const std::string& wordmapkey = HifstConstants::kLmWordmap
                       ) :
   rg_ ( rg ),
   lmkey_ ( lmload ),
   previous_ ( "" ),
   built_ ( false ),
   index_ ( index ),
   isintegermapped_ (!rg.exists (wordmapkey)
                     || rg.get<std::string> (wordmapkey) == ""),
   wordmapkey_ (wordmapkey),
   lmfile_ ( rg.getVectorString ( lmload , index ) ) {
   LDEBUG ( "LM loader using parameters " << lmload << "/" << lmscale <<
           ", and key " << lmkey_  << ",index=" << index_ << ",wordmapkey=" <<
   setLanguageModelScale ( lmscale );
   setLanguageModelWordPenalty ( lmwp );
   if ( rg.getVectorString ( lmload ).size() > index_ + 1 ) {
     LDEBUG ( "Appending Language model..." );
     this->appendTask ( new LoadLanguageModelTask ( rg, index_ + 1, lmload, lmscale ,
                        lmwp , wordmapkey ) );
   LDEBUG ( "." );
コード例 #2
 ///Constructor with registry object, offset and keys
 LoadSparseWeightFlowerLatticeTask ( const ucam::util::RegistryPO& rg,
                                     const unsigned offset =
                                       1, //minimum offset considering only one language model...
                                     const std::string& alignmentlattices =
                                     const std::string& grammarloadkey = HifstConstants::kRuleflowerlatticeLoad,
                                     const std::string& grammarstorekey = HifstConstants::kRuleflowerlatticeStore
                                   ) :
   offset_ ( offset ),
   rg_ ( rg ),
   alilats_ ( rg.exists ( alignmentlattices ) ? rg.get<std::string>
              ( alignmentlattices ) : "" ),
   grammar_ ( rg.get<std::string> ( grammarloadkey ) ),
   fscales_ ( fst::TropicalSparseTupleWeight<float>::Params() ),
   filterbyalilats_ ( rg.exists ( alignmentlattices ) ),
   grammarstorekey_ ( grammarstorekey ) {
コード例 #3
ファイル: task.applylm.hpp プロジェクト: Libardo1/ucam-smt
 ///Constructor with ucam::util::RegistryPO object
 ApplyLanguageModelTask ( const ucam::util::RegistryPO& rg ,
                          const std::string& lmkey = HifstConstants::kLmLoad ,
                          const std::string& latticeloadkey = "lm.lattice.load",
                          const std::string& latticestorekey = "lm.lattice.store",
                          bool deletelmscores = false
                        ) :
   lmkey_ ( lmkey ),
   latticeloadkey_ ( latticeloadkey ),
   latticestorekey_ ( latticestorekey ),
   natlog_ ( !rg.exists ( HifstConstants::kLmLogTen ) ),
   deletelmscores_ (deletelmscores) {
コード例 #4
ファイル: task.loadlm.hpp プロジェクト: ucam-smt/ucam-smt
  * \brief Public constructor. If the user wants to load several language models  (e.g. --lm.load=lm1,lm2,lm3,lm4 and --lm.scale=0.25,0.25,0.25 ),
  * the second and following instances of LoadLanguageModelTask will be created using the private constructor (see below), which has an index to the actual language model that must be loaded.
  * For the public constructor, the index is set to 0.
  * \param rg        ucam::util::RegistryPO object, containing user parameters.
  * \param lmload    key word to access the registry object for language models
  * \param lmscale   key word to access the registry object for language model scales.
  * \param forceone  To force the loading of only one language model (i.e. lm1 with scale 0.25).
 LoadLanguageModelTask ( const ucam::util::RegistryPO& rg
                         , const std::string& lmload = HifstConstants::kLmLoad
                         , const std::string& lmscale =
                         HifstConstants::kLmFeatureweights  //if rg.get(lmscale)=="" the scale will default to 1
                         , const std::string& lmwp =
                         HifstConstants::kLmWordPenalty  //if rg.get(wps)=="" the scale will default to 0
                         , const std::string& wordmapkey = HifstConstants::kLmWordmap
                         , bool forceone = false
     : rg_ ( rg )
     , lmkey_ ( lmload )
     , previous_ ( "" )
     , built_ ( false )
     , index_ ( 0 )
     , isintegermapped_ (!rg.exists (wordmapkey)
                         || rg.get<std::string> (wordmapkey) == "")
     , wordmapkey_ (wordmapkey)
     , lmfile_ ( rg.getVectorString ( lmload , 0 ) )
   LDEBUG ( "LM loader using parameters " << lmload << "/" << lmscale << "/" << lmwp
           << ", and key " << lmkey_  << ",index=" << index_ << ",wordmap=" <<
   FORCELINFO("Language model loader for " << lmfile_() );
   setLanguageModelScale ( lmscale );
   setLanguageModelWordPenalty ( lmwp );
   if ( rg_.getVectorString ( lmload ).size() > 1 ) {
     if ( !forceone ) {
       LINFO ( "Appending Language model..." );
       this->appendTask ( new LoadLanguageModelTask ( rg_, 1, lmload, lmscale , lmwp ,
                          wordmapkey ) );
     } else {
       LWARN ( "Only one loaded for " << lmload <<
               ". Extra language models are being ignored" );
   LINFO ( "Finished constructor!" );
コード例 #5
ファイル: samplehyps.main.cpp プロジェクト: ucam-smt/ucam-smt
int SampleWFSAs( ucam::util::RegistryPO const& rg) {
  using ucam::util::oszfstream;
  using ucam::util::PatternAddress;
  PatternAddress<unsigned> input(rg.get<std::string>(HifstConstants::kInput.c_str()));
  PatternAddress<unsigned> output(rg.get<std::string>(HifstConstants::kOutput.c_str()));
  unsigned n = rg.get<unsigned>(HifstConstants::kNbest.c_str());
  unsigned ns = rg.get<unsigned>(HifstConstants::kNSamples.c_str()); 
  float alpha = rg.get<float>(HifstConstants::kAlpha.c_str()); 
  bool negatives = rg.exists(HifstConstants::kNegativeExamples.c_str()); 
  bool binarytarget = rg.exists(HifstConstants::kBinaryTarget.c_str());
  bool negate = !rg.exists(HifstConstants::kDontNegate.c_str());
  std::string extTok = rg.getString(HifstConstants::kExternalTokenizer.c_str());
  std::string wMap   = rg.getString(HifstConstants::kWordMap.c_str());
  //  std::string wMap = "";
  bool printOutputLabels = rg.exists(HifstConstants::kPrintOutputLabels.c_str());
  std::string refFiles;
  bool intRefs;
  if (rg.exists(HifstConstants::kWordRefs)) {
    refFiles = rg.getString(HifstConstants::kWordRefs);    
    intRefs = false;
  if (rg.exists(HifstConstants::kIntRefs)) {
    refFiles = rg.getString(HifstConstants::kIntRefs);
    intRefs = true;
  std::cerr << refFiles << "**" <<std::endl;
  ucam::fsttools::BleuScorer bleuScorer(refFiles, extTok, n, intRefs, wMap);
  ucam::fsttools::TuneSet< Arc > tuneSet(rg);
  ucam::fsttools::Bleu ibs = tuneSet.ComputeBleu(bleuScorer);
  FORCELINFO("Set level Bleu: " << ibs);
  unsigned seed = time(NULL);
  if (rg.exists(HifstConstants::kRandomSeed.c_str()))
    seed = rg.get<unsigned>(HifstConstants::kRandomSeed.c_str());
  FORCELINFO("random seed: " << seed);
  boost::scoped_ptr<oszfstream> out;
  std::string old;
  for (unsigned i=0; i<tuneSet.cachedLats.size(); i++) {
    fst::VectorFst<Arc> ifst(*tuneSet.cachedLats[i]);
    fst::VectorFst<Arc> nfst;
    if (old != output (i) ) {
      out.reset(new oszfstream (output(i)));
      old = output(i);
    if (!ifst.NumStates() ) {
      FORCELINFO("EMPTY: " << i);
    // Projecting allows unique to work for all cases.
    fst::Project(&ifst, (printOutputLabels?PROJECT_OUTPUT:PROJECT_INPUT));
    ShortestPath (ifst, &nfst, n, true );
    std::vector<HypT> hyps;
    fst::printStrings<Arc> (nfst, &hyps);
    std::vector< LabeledFeature< float, typename Arc::Weight> > fea = 
      ProSBLEUSample<typename Arc::Weight, HypT>(bleuScorer, hyps, i, n, ns, alpha, negatives, negate);
    for (unsigned s=0; s<fea.size(); s++) {
      *out << (binarytarget ? (fea[s].value > 0.0 ? 1 : 0) : fea[s].value);
      *out << " " << fea[s].fea << std::endl;
  FORCELINFO("Done Sample WFSAs");