void OutputNBest(std::ostream& out , const Moses::TrellisPathList &nBestList , const std::vector<Moses::FactorType>& outputFactorOrder , long translationId , char reportSegmentation) { const StaticData &staticData = StaticData::Instance(); bool reportAllFactors = staticData.GetReportAllFactorsNBest(); bool includeSegmentation = staticData.NBestIncludesSegmentation(); bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); TrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); } out << " |||"; // print scores with feature names OutputAllFeatureScores(path.GetScoreBreakdown(), out ); // total out << " ||| " << path.GetTotalScore(); //phrase-to-phrase segmentation if (includeSegmentation) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); out << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { out << "-" << sourceRange.GetEndPos(); } out<< "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { out<< "-" << targetRange.GetEndPos(); } } } if (includeWordAlignment) { out << " ||| "; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); const int sourceOffset = sourceRange.GetStartPos(); const int targetOffset = targetRange.GetStartPos(); const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, sourceOffset, targetOffset); } } if (StaticData::Instance().IsPathRecoveryEnabled()) { out << " ||| "; OutputInput(out, edges[0]); } out << endl; } out << std::flush; }
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation) { const StaticData &staticData = StaticData::Instance(); bool labeledOutput = staticData.IsLabeledNBestList(); bool reportAllFactors = staticData.GetReportAllFactorsNBest(); bool includeSegmentation = staticData.NBestIncludesSegmentation(); bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); TrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); } out << " |||"; // print scores with feature names OutputAllFeatureScores( out, system, path ); string lastName; // translation components const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries(); if (pds.size() > 0) { for( size_t i=0; i<pds.size(); i++ ) { size_t pd_numinputscore = pds[i]->GetNumInputScores(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] ); for (size_t j = 0; j<scores.size(); ++j){ if (labeledOutput && (i == 0) ){ if ((j == 0) || (j == pd_numinputscore)){ lastName = pds[i]->GetScoreProducerWeightShortName(j); out << " " << lastName << ":"; } } out << " " << scores[j]; } } } // generation const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries(); if (gds.size() > 0) { for( size_t i=0; i<gds.size(); i++ ) { size_t pd_numinputscore = gds[i]->GetNumInputScores(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] ); for (size_t j = 0; j<scores.size(); ++j){ if (labeledOutput && (i == 0) ){ if ((j == 0) || (j == pd_numinputscore)){ lastName = gds[i]->GetScoreProducerWeightShortName(j); out << " " << lastName << ":"; } } out << " " << scores[j]; } } } // total out << " ||| " << path.GetTotalScore(); //phrase-to-phrase segmentation if (includeSegmentation) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); out << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { out << "-" << sourceRange.GetEndPos(); } out<< "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { out<< "-" << targetRange.GetEndPos(); } } } if (includeWordAlignment) { out << " ||| "; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); const int sourceOffset = sourceRange.GetStartPos(); const int targetOffset = targetRange.GetStartPos(); const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, sourceOffset, targetOffset); } } if (StaticData::Instance().IsPathRecoveryEnabled()) { out << "|||"; OutputInput(out, edges[0]); } out << endl; } out << std::flush; }
/*** * print surface factor only for the given phrase */ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors) { CHECK(outputFactorOrder.size() > 0); const TargetPhrase& phrase = edge.GetCurrTargetPhrase(); bool markUnknown = StaticData::Instance().GetMarkUnknown(); if (reportAllFactors == true) { out << phrase; } else { FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor(); std::map<size_t, const Factor*> placeholders; if (placeholderFactor != NOT_FOUND) { // creates map of target position -> factor for placeholders placeholders = GetPlaceholders(edge, placeholderFactor); } size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); if (placeholders.size()) { // do placeholders std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos); if (iter != placeholders.end()) { factor = iter->second; } } CHECK(factor); //preface surface form with UNK if marking unknowns const Word &word = phrase.GetWord(pos); if(markUnknown && word.IsOOV()) { out << "UNK" << *factor; } else { out << *factor; } for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); CHECK(factor); out << "|" << *factor; } out << " "; } } // trace ("report segmentation") option "-t" / "-tt" if (reportSegmentation > 0 && phrase.GetSize() > 0) { const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); const int sourceStart = sourceRange.GetStartPos(); const int sourceEnd = sourceRange.GetEndPos(); out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" if (reportSegmentation == 2) { out << ",wa="; const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, 0, 0); out << ",total="; out << edge.GetScore() - edge.GetPrevHypo()->GetScore(); out << ","; ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); OutputAllFeatureScores(scoreBreakdown, out); } out << "| "; } }
void Manager::OutputNBestList(OutputCollector *collector, const KBestExtractor::KBestVec &nBestList, long translationId) const { const StaticData &staticData = StaticData::Instance(); const std::vector<FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder(); std::ostringstream out; if (collector->OutputIsCout()) { // Set precision only if we're writing the n-best list to cout. This is to // preserve existing behaviour, but should probably be done either way. FixPrecision(out); } bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); bool PrintNBestTrees = staticData.PrintNBestTrees(); for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin(); p != nBestList.end(); ++p) { const KBestExtractor::Derivation &derivation = **p; // get the derivation's target-side yield Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation); // delete <s> and </s> UTIL_THROW_IF2(outputPhrase.GetSize() < 2, "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); // print the translation ID, surface factors, and scores out << translationId << " ||| "; OutputSurface(out, outputPhrase, outputFactorOrder, false); out << " ||| "; OutputAllFeatureScores(derivation.scoreBreakdown, out); out << " ||| " << derivation.score; // optionally, print word alignments if (includeWordAlignment) { out << " ||| "; Alignments align; OutputAlignmentNBest(align, derivation, 0); for (Alignments::const_iterator q = align.begin(); q != align.end(); ++q) { out << q->first << "-" << q->second << " "; } } // optionally, print tree if (PrintNBestTrees) { TreePointer tree = KBestExtractor::GetOutputTree(derivation); out << " ||| " << tree->GetString(); } out << std::endl; } assert(collector); collector->Write(translationId, out.str()); }