void SourceWordDeletionFeature::ComputeFeatures(const Phrase &source, const TargetPhrase& targetPhrase, ScoreComponentCollection* accumulator, const AlignmentInfo &alignmentInfo) const { // handle special case: unknown words (they have no word alignment) size_t targetLength = targetPhrase.GetSize(); size_t sourceLength = source.GetSize(); if (targetLength == 1 && sourceLength == 1 && !alignmentInfo.GetSize()) return; // flag aligned words bool aligned[16]; CHECK(sourceLength < 16); for(size_t i=0; i<sourceLength; i++) aligned[i] = false; for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++) aligned[ alignmentPoint->first ] = true; // process unaligned source words for(size_t i=0; i<sourceLength; i++) { if (!aligned[i]) { const Word &w = source.GetWord(i); if (!w.IsNonTerminal()) { const StringPiece word = w.GetFactor(m_factorType)->GetString(); if (word != "<s>" && word != "</s>") { if (!m_unrestricted && FindStringPiece(m_vocab, word ) == m_vocab.end()) { accumulator->PlusEquals(this, StringPiece("OTHER"),1); } else { accumulator->PlusEquals(this,word,1); } } } } } }
void TargetWordInsertionFeature::ComputeFeatures(const TargetPhrase& targetPhrase, ScoreComponentCollection* accumulator, const AlignmentInfo &alignmentInfo) const { // handle special case: unknown words (they have no word alignment) size_t targetLength = targetPhrase.GetSize(); size_t sourceLength = targetPhrase.GetSourcePhrase().GetSize(); if (targetLength == 1 && sourceLength == 1) { const Factor* f1 = targetPhrase.GetWord(0).GetFactor(1); if (f1 && f1->GetString().compare(UNKNOWN_FACTOR) == 0) { return; } } // flag aligned words bool aligned[16]; CHECK(targetLength < 16); for(size_t i=0; i<targetLength; i++) { aligned[i] = false; } for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++) { aligned[ alignmentPoint->second ] = true; } // process unaligned target words for(size_t i=0; i<targetLength; i++) { if (!aligned[i]) { Word w = targetPhrase.GetWord(i); if (!w.IsNonTerminal()) { const string &word = w.GetFactor(m_factorType)->GetString(); if (word != "<s>" && word != "</s>") { if (!m_unrestricted && m_vocab.find( word ) == m_vocab.end()) { accumulator->PlusEquals(this,"OTHER",1); } else { accumulator->PlusEquals(this,word,1); } } } } } }
pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> PhraseDictionaryMultiModelCounts::GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const { size_t tsize = phraseT.GetSize(); size_t ssize = phraseS.GetSize(); AlignVector alignedToT (tsize); AlignVector alignedToS (ssize); AlignmentInfo::const_iterator iter; for (iter = alignment.begin(); iter != alignment.end(); ++iter) { const pair<size_t,size_t> &alignPair = *iter; size_t s = alignPair.first; size_t t = alignPair.second; if (s >= ssize || t >= tsize) { cerr << "Error: inconsistent alignment for phrase pair: " << phraseS << " - " << phraseT << endl; cerr << "phrase pair will be discarded" << endl; throw AlignmentException(); } alignedToT[t].insert( s ); alignedToS[s].insert( t ); } return make_pair(alignedToT,alignedToS); }
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId) { const StaticData &staticData = StaticData::Instance(); bool labeledOutput = staticData.IsLabeledNBestList(); bool reportAllFactors = staticData.GetReportAllFactorsNBest(); bool includeAlignment = staticData.NBestIncludesAlignment(); bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); TrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge.GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors); } out << " |||"; std::string lastName = ""; const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions(); for( size_t i=0; i<sff.size(); i++ ) { if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() ) { lastName = sff[i]->GetScoreProducerWeightShortName(); out << " " << lastName << ":"; } vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] ); for (size_t j = 0; j<scores.size(); ++j) { out << " " << scores[j]; } } const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions(); for( size_t i=0; i<slf.size(); i++ ) { if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() ) { lastName = slf[i]->GetScoreProducerWeightShortName(); out << " " << lastName << ":"; } vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] ); for (size_t j = 0; j<scores.size(); ++j) { out << " " << scores[j]; } } // translation components if (StaticData::Instance().GetInputType()==SentenceInput){ // translation components for text input vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries(); if (pds.size() > 0) { if (labeledOutput) out << " tm:"; vector<PhraseDictionaryFeature*>::iterator iter; for (iter = pds.begin(); iter != pds.end(); ++iter) { vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); for (size_t j = 0; j<scores.size(); ++j) out << " " << scores[j]; } } } else{ // translation components for Confusion Network input // first translation component has GetNumInputScores() scores from the input Confusion Network // at the beginning of the vector vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries(); if (pds.size() > 0) { vector<PhraseDictionaryFeature*>::iterator iter; iter = pds.begin(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); size_t pd_numinputscore = (*iter)->GetNumInputScores(); if (pd_numinputscore){ if (labeledOutput) out << " I:"; for (size_t j = 0; j < pd_numinputscore; ++j) out << " " << scores[j]; } for (iter = pds.begin() ; iter != pds.end(); ++iter) { vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); size_t pd_numinputscore = (*iter)->GetNumInputScores(); if (iter == pds.begin() && labeledOutput) out << " tm:"; for (size_t j = pd_numinputscore; j < scores.size() ; ++j) out << " " << scores[j]; } } } // generation const vector<GenerationDictionary*> gds = system->GetGenerationDictionaries(); if (gds.size() > 0) { if (labeledOutput) out << " g: "; vector<GenerationDictionary*>::const_iterator iter; for (iter = gds.begin(); iter != gds.end(); ++iter) { vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter); for (size_t j = 0; j<scores.size(); j++) { out << scores[j] << " "; } } } // total out << " ||| " << path.GetTotalScore(); //phrase-to-phrase alignment if (includeAlignment) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); out << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { out << "-" << sourceRange.GetEndPos(); } out<< "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { out<< "-" << targetRange.GetEndPos(); } } } if (includeWordAlignment) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); const int sourceOffset = sourceRange.GetStartPos(); const int targetOffset = targetRange.GetStartPos(); const AlignmentInfo AI = edge.GetCurrTargetPhrase().GetAlignmentInfo(); AlignmentInfo::const_iterator iter; for (iter = AI.begin(); iter != AI.end(); ++iter) { out << " " << iter->first+sourceOffset << "-" << iter->second+targetOffset; } } } if (StaticData::Instance().IsPathRecoveryEnabled()) { out << "|||"; OutputInput(out, edges[0]); } out << endl; } out <<std::flush; }