void Model1Feature::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore) const { const Sentence& sentence = static_cast<const Sentence&>(input); float score = 0.0; float norm = TransformScore(1+sentence.GetSize()); for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) { const Word &wordT = targetPhrase.GetWord(posT); if ( !wordT.IsNonTerminal() ) { float thisWordProb = m_model1.GetProbability(m_emptyWord,wordT[0]); // probability conditioned on empty word // cache lookup bool foundInCache = false; { #ifdef WITH_THREADS boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock); #endif boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::const_iterator sentenceCache = m_cache.find(&input); if (sentenceCache != m_cache.end()) { boost::unordered_map<const Factor*, float>::const_iterator cacheHit = sentenceCache->second.find(wordT[0]); if (cacheHit != sentenceCache->second.end()) { foundInCache = true; score += cacheHit->second; FEATUREVERBOSE(3, "Cached score( " << wordT << " ) = " << cacheHit->second << std::endl); } } } if (!foundInCache) { for (size_t posS=1; posS<sentence.GetSize()-1; ++posS) { // ignore <s> and </s> const Word &wordS = sentence.GetWord(posS); float modelProb = m_model1.GetProbability(wordS[0],wordT[0]); FEATUREVERBOSE(4, "p( " << wordT << " | " << wordS << " ) = " << modelProb << std::endl); thisWordProb += modelProb; } float thisWordScore = TransformScore(thisWordProb) - norm; FEATUREVERBOSE(3, "score( " << wordT << " ) = " << thisWordScore << std::endl); { #ifdef WITH_THREADS // need to update cache; write lock boost::unique_lock<boost::shared_mutex> lock(m_accessLock); #endif m_cache[&input][wordT[0]] = thisWordScore; } score += thisWordScore; } } } scoreBreakdown.PlusEquals(this, score); }
void Model1Feature::Load() { FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading source vocabulary from file " << m_fileNameVcbS << " ..."); Model1Vocabulary vcbS; vcbS.Load(m_fileNameVcbS); FEATUREVERBOSE2(2, " Done." << std::endl); FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading target vocabulary from file " << m_fileNameVcbT << " ..."); Model1Vocabulary vcbT; vcbT.Load(m_fileNameVcbT); FEATUREVERBOSE2(2, " Done." << std::endl); FEATUREVERBOSE(2, GetScoreProducerDescription() << ": Loading model 1 lexical translation table from file " << m_fileNameModel1 << " ..."); m_model1.Load(m_fileNameModel1,vcbS,vcbT); FEATUREVERBOSE2(2, " Done." << std::endl); FactorCollection &factorCollection = FactorCollection::Instance(); m_emptyWord = factorCollection.GetFactor(Model1Vocabulary::GIZANULL,false); UTIL_THROW_IF2(m_emptyWord==NULL, GetScoreProducerDescription() << ": Factor for GIZA empty word does not exist."); }
void WordTranslationFeature::Load(AllOptions::ptr const& opts) { m_options = opts; // load word list for restricted feature set if (m_filePathSource.empty()) { return; } //else if (tokens.size() == 8) { FEATUREVERBOSE(1, "Loading word translation word lists from " << m_filePathSource << " and " << m_filePathTarget << std::endl); if (m_domainTrigger) { // domain trigger terms for each input document ifstream inFileSource(m_filePathSource.c_str()); UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource); std::string line; while (getline(inFileSource, line)) { m_vocabDomain.resize(m_vocabDomain.size() + 1); vector<string> termVector; boost::split(termVector, line, boost::is_any_of("\t ")); for (size_t i=0; i < termVector.size(); ++i) m_vocabDomain.back().insert(termVector[i]); } inFileSource.close(); } else if (!m_filePathSource.empty() || !m_filePathTarget.empty()) { return; // restricted source word vocabulary ifstream inFileSource(m_filePathSource.c_str()); UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource); std::string line; while (getline(inFileSource, line)) { m_vocabSource.insert(line); } inFileSource.close(); // restricted target word vocabulary ifstream inFileTarget(m_filePathTarget.c_str()); UTIL_THROW_IF2(!inFileTarget, "could not open file " << m_filePathTarget); while (getline(inFileTarget, line)) { m_vocabTarget.insert(line); } inFileTarget.close(); m_unrestricted = false; } }