Beispiel #1
0
void LanguageModelIRST::Load(AllOptions::ptr const& opts)
{
  FactorCollection &factorCollection = FactorCollection::Instance();

  m_lmtb = m_lmtb->CreateLanguageModel(m_filePath);
  if (m_lmtb_size > 0) m_lmtb->setMaxLoadedLevel(m_lmtb_size);
  m_lmtb->load(m_filePath);
  d=m_lmtb->getDict();
  d->incflag(1);

  m_nGramOrder = m_lmtb_size = m_lmtb->maxlevel();

  // LM can be ok, just outputs warnings
  // Mauro: in the original, the following two instructions are wrongly switched:
  m_unknownId = d->oovcode(); // at the level of micro tags
  m_empty = -1; // code for an empty position

  CreateFactors(factorCollection);

  VERBOSE(1, GetScoreProducerDescription() << "  LanguageModelIRST::Load() m_unknownId=" << m_unknownId << std::endl);

  //install caches to save time (only if PS_CACHE_ENABLE is defined through compilation flags)
  m_lmtb->init_caches(m_lmtb_size>2?m_lmtb_size-1:2);

  if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);
}
SourceGHKMTreeInputMatchFeature::SourceGHKMTreeInputMatchFeature(const std::string &line)
  : StatelessFeatureFunction(2, line)
{
  std::cerr << GetScoreProducerDescription() << "Initializing feature...";
  ReadParameters();
  std::cerr << " Done." << std::endl;
}
Beispiel #3
0
Model1Feature::Model1Feature(const std::string &line)
  : StatelessFeatureFunction(1, line)
{
  VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
  ReadParameters();
  VERBOSE(1, " Done.");
}
void FeatureFunction::SetParameter(const std::string& key, const std::string& value)
{
  if (key == "tuneable") {
    m_tuneable = Scan<bool>(value);
  } else if (key == "tuneable-components") {
    UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription()
                   << ": tuneable-components cannot be set if tuneable=false");
    SetTuneableComponents(value);
  } else if (key == "require-sorting-after-source-context") {
    m_requireSortingAfterSourceContext = Scan<bool>(value);
  } else if (key == "verbosity") {
    m_verbosity = Scan<size_t>(value);
  } else if (key == "filterable") { //ignore
  } else {
    UTIL_THROW2(GetScoreProducerDescription() << ": Unknown argument " << key << "=" << value);
  }
}
Beispiel #5
0
LanguageModelIRST::LanguageModelIRST(const std::string &line)
  :LanguageModelSingleFactor(line)
  ,m_lmtb_dub(0), m_lmtb_size(0)
{
  const StaticData &staticData = StaticData::Instance();
  int threadCount = staticData.ThreadCount();
  if (threadCount != 1) {
    throw runtime_error("Error: " + SPrint(threadCount) + " number of threads specified but IRST LM is not threadsafe.");
  }

  ReadParameters();

  VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_dub:|" << m_lmtb_dub << "|" << std::endl);
  VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_filePath:|" << m_filePath << "|" << std::endl);
  VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_factorType:|" << m_factorType << "|" << std::endl);
  VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_size:|" << m_lmtb_size << "|" << std::endl);
}
Beispiel #6
0
void Model1Feature::Load()
{
  FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading source vocabulary from file " << m_fileNameVcbS << " ...");
  Model1Vocabulary vcbS;
  vcbS.Load(m_fileNameVcbS);
  FEATUREVERBOSE2(2, " Done." << std::endl);
  FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading target vocabulary from file " << m_fileNameVcbT << " ...");
  Model1Vocabulary vcbT;
  vcbT.Load(m_fileNameVcbT);
  FEATUREVERBOSE2(2, " Done." << std::endl);
  FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading model 1 lexical translation table from file " << m_fileNameModel1 << " ...");
  m_model1.Load(m_fileNameModel1,vcbS,vcbT);
  FEATUREVERBOSE2(2, " Done." << std::endl);
  FactorCollection &factorCollection = FactorCollection::Instance();
  m_emptyWord = factorCollection.GetFactor(Model1Vocabulary::GIZANULL,false);
  UTIL_THROW_IF2(m_emptyWord==NULL, GetScoreProducerDescription()
                 << ": Factor for GIZA empty word does not exist.");
}
void FeatureFunction::SetTuneableComponents(const std::string& value)
{
  std::vector<std::string> toks = Tokenize(value,",");
  UTIL_THROW_IF2(toks.empty(), GetScoreProducerDescription()
                 << ": Empty tuneable-components");
  UTIL_THROW_IF2(toks.size()!=m_numScoreComponents, GetScoreProducerDescription()
                 << ": tuneable-components value has to be a comma-separated list of "
                 << m_numScoreComponents << " boolean values");

  m_tuneableComponents.resize(m_numScoreComponents);
  m_numTuneableComponents = m_numScoreComponents;

  for (size_t i = 0; i < toks.size(); ++i) {
    m_tuneableComponents[i] = Scan<bool>(toks[i]);
    if (!m_tuneableComponents[i]) {
      --m_numTuneableComponents;
    }
  }
}
RulePairUnlexicalizedSource::RulePairUnlexicalizedSource(const std::string &line)
    : StatelessFeatureFunction(1, line)
    , m_glueRules(false)
    , m_nonGlueRules(true)
    , m_glueTargetLHSStr("Q")
{
    VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
    ReadParameters();
    FactorCollection &factorCollection = FactorCollection::Instance();
    m_glueTargetLHS = factorCollection.AddFactor(m_glueTargetLHSStr, true);
    VERBOSE(1, " Done.");
}
WordTranslationFeature::WordTranslationFeature(const std::string &line)
  :StatelessFeatureFunction(0, line)
  ,m_unrestricted(true)
  ,m_simple(true)
  ,m_sourceContext(false)
  ,m_targetContext(false)
  ,m_domainTrigger(false)
  ,m_ignorePunctuation(false)
{
  VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
  ReadParameters();

  if (m_simple == 1) VERBOSE(1, " Using simple word translations.");
  if (m_sourceContext == 1) VERBOSE(1, " Using source context.");
  if (m_targetContext == 1) VERBOSE(1, " Using target context.");
  if (m_domainTrigger == 1) VERBOSE(1, " Using domain triggers.");

  // compile a list of punctuation characters
  if (m_ignorePunctuation) {
    VERBOSE(1, " Ignoring punctuation for triggers.");
    char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
    for (size_t i=0; i < sizeof(punctuation)-1; ++i) {
      m_punctuationHash[punctuation[i]] = 1;
    }
  }

  VERBOSE(1, " Done." << std::endl);

  // TODO not sure about this
  /*
  if (weight[0] != 1) {
    AddSparseProducer(wordTranslationFeature);
    VERBOSE(1, "wt sparse producer weight: " << weight[0] << std::endl);
    if (m_mira)
      m_metaFeatureProducer = new MetaFeatureProducer("wt");
  }

  if (m_parameter->GetParam("report-sparse-features").size() > 0) {
    wordTranslationFeature->SetSparseFeatureReporting();
  }
  */

}
void SourceGHKMTreeInputMatchFeature::SetParameter(const std::string& key, const std::string& value)
{
  UTIL_THROW(util::Exception, GetScoreProducerDescription() << ": Unknown parameter " << key << "=" << value);
}