void OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges) { ostringstream out; OutputAlignment(out, edges); collector->Write(lineNo,out.str()); }
/*** * print surface factor only for the given phrase */ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors) { CHECK(outputFactorOrder.size() > 0); const Phrase& phrase = edge.GetCurrTargetPhrase(); bool markUnknown = StaticData::Instance().GetMarkUnknown(); if (reportAllFactors == true) { out << phrase; } else { FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor().second; size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); if (placeholderFactor != NOT_FOUND) { const Factor *origFactor = phrase.GetFactor(pos, placeholderFactor); if (origFactor) { factor = origFactor; } } CHECK(factor); //preface surface form with UNK if marking unknowns const Word &word = phrase.GetWord(pos); if(markUnknown && word.IsOOV()) { out << "UNK" << *factor; } else { out << *factor; } for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); CHECK(factor); out << "|" << *factor; } out << " "; } } // trace option "-t" / "-tt" if (reportSegmentation > 0 && phrase.GetSize() > 0) { const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); const int sourceStart = sourceRange.GetStartPos(); const int sourceEnd = sourceRange.GetEndPos(); out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" if (reportSegmentation == 2) { out << ",0, "; const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, 0, 0); } out << "| "; } }
void OutputAlignment(std::ostream &out, const Moses::Hypothesis *hypo) { std::vector<const Hypothesis *> edges; const Hypothesis *currentHypo = hypo; while (currentHypo) { edges.push_back(currentHypo); currentHypo = currentHypo->GetPrevHypo(); } OutputAlignment(out, edges); }
void OutputAlignment(OutputCollector* collector, size_t lineNo , const Hypothesis *hypo) { if (collector) { std::vector<const Hypothesis *> edges; const Hypothesis *currentHypo = hypo; while (currentHypo) { edges.push_back(currentHypo); currentHypo = currentHypo->GetPrevHypo(); } OutputAlignment(collector,lineNo, edges); } }
void OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges) { size_t targetOffset = 0; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const TargetPhrase &tp = edge.GetCurrTargetPhrase(); size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos(); OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset); targetOffset += tp.GetSize(); } out << std::endl; }
void OutputNBest(std::ostream& out , const Moses::TrellisPathList &nBestList , const std::vector<Moses::FactorType>& outputFactorOrder , long translationId , char reportSegmentation) { const StaticData &staticData = StaticData::Instance(); bool reportAllFactors = staticData.GetReportAllFactorsNBest(); bool includeSegmentation = staticData.NBestIncludesSegmentation(); bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); TrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); } out << " |||"; // print scores with feature names OutputAllFeatureScores(path.GetScoreBreakdown(), out ); // total out << " ||| " << path.GetTotalScore(); //phrase-to-phrase segmentation if (includeSegmentation) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); out << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { out << "-" << sourceRange.GetEndPos(); } out<< "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { out<< "-" << targetRange.GetEndPos(); } } } if (includeWordAlignment) { out << " ||| "; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); const int sourceOffset = sourceRange.GetStartPos(); const int targetOffset = targetRange.GetStartPos(); const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, sourceOffset, targetOffset); } } if (StaticData::Instance().IsPathRecoveryEnabled()) { out << " ||| "; OutputInput(out, edges[0]); } out << endl; } out << std::flush; }
void OutputAlignment(OutputCollector* collector, size_t lineNo , const TrellisPath &path) { if (collector) { OutputAlignment(collector,lineNo, path.GetEdges()); } }
/*** * print surface factor only for the given phrase */ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors) { CHECK(outputFactorOrder.size() > 0); const TargetPhrase& phrase = edge.GetCurrTargetPhrase(); bool markUnknown = StaticData::Instance().GetMarkUnknown(); if (reportAllFactors == true) { out << phrase; } else { FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor(); std::map<size_t, const Factor*> placeholders; if (placeholderFactor != NOT_FOUND) { // creates map of target position -> factor for placeholders placeholders = GetPlaceholders(edge, placeholderFactor); } size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); if (placeholders.size()) { // do placeholders std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos); if (iter != placeholders.end()) { factor = iter->second; } } CHECK(factor); //preface surface form with UNK if marking unknowns const Word &word = phrase.GetWord(pos); if(markUnknown && word.IsOOV()) { out << "UNK" << *factor; } else { out << *factor; } for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); CHECK(factor); out << "|" << *factor; } out << " "; } } // trace ("report segmentation") option "-t" / "-tt" if (reportSegmentation > 0 && phrase.GetSize() > 0) { const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); const int sourceStart = sourceRange.GetStartPos(); const int sourceEnd = sourceRange.GetEndPos(); out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" if (reportSegmentation == 2) { out << ",wa="; const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, 0, 0); out << ",total="; out << edge.GetScore() - edge.GetPrevHypo()->GetScore(); out << ","; ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); OutputAllFeatureScores(scoreBreakdown, out); } out << "| "; } }
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation) { const StaticData &staticData = StaticData::Instance(); bool labeledOutput = staticData.IsLabeledNBestList(); bool reportAllFactors = staticData.GetReportAllFactorsNBest(); bool includeSegmentation = staticData.NBestIncludesSegmentation(); bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); TrellisPathList::const_iterator iter; for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { const TrellisPath &path = **iter; const std::vector<const Hypothesis *> &edges = path.GetEdges(); // print the surface factor of the translation out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); } out << " |||"; // print scores with feature names OutputAllFeatureScores( out, system, path ); string lastName; // translation components const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries(); if (pds.size() > 0) { for( size_t i=0; i<pds.size(); i++ ) { size_t pd_numinputscore = pds[i]->GetNumInputScores(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] ); for (size_t j = 0; j<scores.size(); ++j){ if (labeledOutput && (i == 0) ){ if ((j == 0) || (j == pd_numinputscore)){ lastName = pds[i]->GetScoreProducerWeightShortName(j); out << " " << lastName << ":"; } } out << " " << scores[j]; } } } // generation const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries(); if (gds.size() > 0) { for( size_t i=0; i<gds.size(); i++ ) { size_t pd_numinputscore = gds[i]->GetNumInputScores(); vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] ); for (size_t j = 0; j<scores.size(); ++j){ if (labeledOutput && (i == 0) ){ if ((j == 0) || (j == pd_numinputscore)){ lastName = gds[i]->GetScoreProducerWeightShortName(j); out << " " << lastName << ":"; } } out << " " << scores[j]; } } } // total out << " ||| " << path.GetTotalScore(); //phrase-to-phrase segmentation if (includeSegmentation) { out << " |||"; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); out << " " << sourceRange.GetStartPos(); if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) { out << "-" << sourceRange.GetEndPos(); } out<< "=" << targetRange.GetStartPos(); if (targetRange.GetStartPos() < targetRange.GetEndPos()) { out<< "-" << targetRange.GetEndPos(); } } } if (includeWordAlignment) { out << " ||| "; for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); WordsRange targetRange = path.GetTargetWordsRange(edge); const int sourceOffset = sourceRange.GetStartPos(); const int targetOffset = targetRange.GetStartPos(); const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, sourceOffset, targetOffset); } } if (StaticData::Instance().IsPathRecoveryEnabled()) { out << "|||"; OutputInput(out, edges[0]); } out << endl; } out << std::flush; }