void OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors) { if (hypo != NULL) { // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors); OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors); } }
void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, char reportSegmentation, bool reportAllFactors, std::ostream &out) { const std::vector<const Hypothesis *> &edges = path.GetEdges(); for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors); } out << endl; }
void OutputSurface(std::ostream &out, const ChartHypothesis *hypo, const std::vector<FactorType> &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors) { if ( hypo != NULL) { //OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors); const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos(); vector<const ChartHypothesis*>::const_iterator iter; for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) { const ChartHypothesis *prevHypo = *iter; OutputSurface(out, prevHypo, outputFactorOrder, reportSegmentation, reportAllFactors); } } }
void OutputNBest(std::ostream& out , const Moses::TrellisPathList &nBestList , const std::vector<Moses::FactorType>& outputFactorOrder , long translationId , char reportSegmentation) { const StaticData &staticData = StaticData::Instance(); bool reportAllFactors = staticData.GetReportAllFactorsNBest(); bool includeSegmentation = staticData.NBestIncludesSegmentation(); bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); TrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); } out << " |||"; // print scores with feature names OutputAllFeatureScores(path.GetScoreBreakdown(), out ); // total out << " ||| " << path.GetTotalScore(); //phrase-to-phrase segmentation if (includeSegmentation) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); out << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { out << "-" << sourceRange.GetEndPos(); } out<< "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { out<< "-" << targetRange.GetEndPos(); } } } if (includeWordAlignment) { out << " ||| "; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); const int sourceOffset = sourceRange.GetStartPos(); const int targetOffset = targetRange.GetStartPos(); const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, sourceOffset, targetOffset); } } if (StaticData::Instance().IsPathRecoveryEnabled()) { out << " ||| "; OutputInput(out, edges[0]); } out << endl; } out << std::flush; }
void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const ChartHypothesis *bestHypo, const TranslationSystem* system, long translationId) { std::ostringstream out; // Check if we're writing to std::cout. if (m_nBestOutputCollector->OutputIsCout()) { // Set precision only if we're writing the n-best list to cout. This is to // preserve existing behaviour, but should probably be done either way. IOWrapper::FixPrecision(out); // The output from -output-hypo-score is always written to std::cout. if (StaticData::Instance().GetOutputHypoScore()) { if (bestHypo != NULL) { out << bestHypo->GetTotalScore() << " "; } else { out << "0 "; } } } bool labeledOutput = StaticData::Instance().IsLabeledNBestList(); //bool includeAlignment = StaticData::Instance().NBestIncludesAlignment(); ChartTrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const ChartTrellisPath &path = **iter; //cerr << path << endl << endl; Moses::Phrase outputPhrase = path.GetOutputPhrase(); // delete 1st & last CHECK(outputPhrase.GetSize() >= 2); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); // print the surface factor of the translation out << translationId << " ||| "; OutputSurface(out, outputPhrase, m_outputFactorOrder, false); out << " ||| "; // print the scores in a hardwired order // before each model type, the corresponding command-line-like name must be emitted // MERT script relies on this // lm const LMList& lml = system->GetLanguageModels(); if (lml.size() > 0) { if (labeledOutput) out << "lm:"; LMList::const_iterator lmi = lml.begin(); for (; lmi != lml.end(); ++lmi) { out << " " << path.GetScoreBreakdown().GetScoreForProducer(*lmi); } } std::string lastName = ""; // output stateful sparse features const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions(); for( size_t i=0; i<sff.size(); i++ ) if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited) OutputSparseFeatureScores( out, path, sff[i], lastName ); // translation components const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries(); if (pds.size() > 0) { for( size_t i=0; i<pds.size(); i++ ) { size_t pd_numinputscore = pds[i]->GetNumInputScores(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] ); for (size_t j = 0; j<scores.size(); ++j){ if (labeledOutput && (i == 0) ){ if ((j == 0) || (j == pd_numinputscore)){ lastName = pds[i]->GetScoreProducerWeightShortName(j); out << " " << lastName << ":"; } } out << " " << scores[j]; } } } // word penalty if (labeledOutput) out << " w:"; out << " " << path.GetScoreBreakdown().GetScoreForProducer(system->GetWordPenaltyProducer()); // generation const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries(); if (gds.size() > 0) { for( size_t i=0; i<gds.size(); i++ ) { size_t pd_numinputscore = gds[i]->GetNumInputScores(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] ); for (size_t j = 0; j<scores.size(); ++j){ if (labeledOutput && (i == 0) ){ if ((j == 0) || (j == pd_numinputscore)){ lastName = gds[i]->GetScoreProducerWeightShortName(j); out << " " << lastName << ":"; } } out << " " << scores[j]; } } } // output stateless sparse features lastName = ""; const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions(); for( size_t i=0; i<slf.size(); i++ ) { if (slf[i]->GetNumScoreComponents() == ScoreProducer::unlimited) { OutputSparseFeatureScores( out, path, slf[i], lastName ); } } // total out << " ||| " << path.GetTotalScore(); /* if (includeAlignment) { *m_nBestStream << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const ChartHypothesis &edge = *edges[currEdge]; WordsRange sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = edge.GetCurrTargetWordsRange(); *m_nBestStream << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { *m_nBestStream << "-" << sourceRange.GetEndPos(); } *m_nBestStream << "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { *m_nBestStream << "-" << targetRange.GetEndPos(); } } } */ out << endl; } out <<std::flush; CHECK(m_nBestOutputCollector); m_nBestOutputCollector->Write(translationId, out.str()); }
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation) { const StaticData &staticData = StaticData::Instance(); bool labeledOutput = staticData.IsLabeledNBestList(); bool reportAllFactors = staticData.GetReportAllFactorsNBest(); bool includeSegmentation = staticData.NBestIncludesSegmentation(); bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); TrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); } out << " |||"; // print scores with feature names OutputAllFeatureScores( out, system, path ); string lastName; // translation components const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries(); if (pds.size() > 0) { for( size_t i=0; i<pds.size(); i++ ) { size_t pd_numinputscore = pds[i]->GetNumInputScores(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] ); for (size_t j = 0; j<scores.size(); ++j){ if (labeledOutput && (i == 0) ){ if ((j == 0) || (j == pd_numinputscore)){ lastName = pds[i]->GetScoreProducerWeightShortName(j); out << " " << lastName << ":"; } } out << " " << scores[j]; } } } // generation const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries(); if (gds.size() > 0) { for( size_t i=0; i<gds.size(); i++ ) { size_t pd_numinputscore = gds[i]->GetNumInputScores(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] ); for (size_t j = 0; j<scores.size(); ++j){ if (labeledOutput && (i == 0) ){ if ((j == 0) || (j == pd_numinputscore)){ lastName = gds[i]->GetScoreProducerWeightShortName(j); out << " " << lastName << ":"; } } out << " " << scores[j]; } } } // total out << " ||| " << path.GetTotalScore(); //phrase-to-phrase segmentation if (includeSegmentation) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); out << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { out << "-" << sourceRange.GetEndPos(); } out<< "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { out<< "-" << targetRange.GetEndPos(); } } } if (includeWordAlignment) { out << " ||| "; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); const int sourceOffset = sourceRange.GetStartPos(); const int targetOffset = targetRange.GetStartPos(); const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, sourceOffset, targetOffset); } } if (StaticData::Instance().IsPathRecoveryEnabled()) { out << "|||"; OutputInput(out, edges[0]); } out << endl; } out << std::flush; }
void Manager::OutputNBestList(OutputCollector *collector, const KBestExtractor::KBestVec &nBestList, long translationId) const { const StaticData &staticData = StaticData::Instance(); const std::vector<FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder(); std::ostringstream out; if (collector->OutputIsCout()) { // Set precision only if we're writing the n-best list to cout. This is to // preserve existing behaviour, but should probably be done either way. FixPrecision(out); } bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); bool PrintNBestTrees = staticData.PrintNBestTrees(); for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin(); p != nBestList.end(); ++p) { const KBestExtractor::Derivation &derivation = **p; // get the derivation's target-side yield Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation); // delete <s> and </s> UTIL_THROW_IF2(outputPhrase.GetSize() < 2, "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); // print the translation ID, surface factors, and scores out << translationId << " ||| "; OutputSurface(out, outputPhrase, outputFactorOrder, false); out << " ||| "; OutputAllFeatureScores(derivation.scoreBreakdown, out); out << " ||| " << derivation.score; // optionally, print word alignments if (includeWordAlignment) { out << " ||| "; Alignments align; OutputAlignmentNBest(align, derivation, 0); for (Alignments::const_iterator q = align.begin(); q != align.end(); ++q) { out << q->first << "-" << q->second << " "; } } // optionally, print tree if (PrintNBestTrees) { TreePointer tree = KBestExtractor::GetOutputTree(derivation); out << " ||| " << tree->GetString(); } out << std::endl; } assert(collector); collector->Write(translationId, out.str()); }