/*** * print surface factor only for the given phrase */ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder, bool reportSegmentation, bool reportAllFactors) { CHECK(outputFactorOrder.size() > 0); const Phrase& phrase = edge.GetCurrTargetPhrase(); if (reportAllFactors == true) { out << phrase; } else { size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); out << *factor; CHECK(factor); for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); CHECK(factor); out << "|" << *factor; } out << " "; } } // trace option "-t" if (reportSegmentation == true && phrase.GetSize() > 0) { out << "|" << edge.GetCurrSourceWordsRange().GetStartPos() << "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| "; } }
/*** * print surface factor only for the given phrase */ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors) { CHECK(outputFactorOrder.size() > 0); const Phrase& phrase = edge.GetCurrTargetPhrase(); bool markUnknown = StaticData::Instance().GetMarkUnknown(); if (reportAllFactors == true) { out << phrase; } else { FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor().second; size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); if (placeholderFactor != NOT_FOUND) { const Factor *origFactor = phrase.GetFactor(pos, placeholderFactor); if (origFactor) { factor = origFactor; } } CHECK(factor); //preface surface form with UNK if marking unknowns const Word &word = phrase.GetWord(pos); if(markUnknown && word.IsOOV()) { out << "UNK" << *factor; } else { out << *factor; } for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); CHECK(factor); out << "|" << *factor; } out << " "; } } // trace option "-t" / "-tt" if (reportSegmentation > 0 && phrase.GetSize() > 0) { const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); const int sourceStart = sourceRange.GetStartPos(); const int sourceEnd = sourceRange.GetEndPos(); out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" if (reportSegmentation == 2) { out << ",0, "; const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, 0, 0); } out << "| "; } }
/// add phrase alignment information from a Hypothesis void TranslationRequest:: add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const { if (!m_withAlignInfo) return; WordsRange const& trg = h.GetCurrTargetWordsRange(); WordsRange const& src = h.GetCurrSourceWordsRange(); std::map<std::string, xmlrpc_c::value> pAlnInfo; pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos()); pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos()); pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos()); aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo)); }
/*** * print surface factor only for the given phrase */ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder, char reportSegmentation, bool reportAllFactors) { CHECK(outputFactorOrder.size() > 0); const TargetPhrase& phrase = edge.GetCurrTargetPhrase(); bool markUnknown = StaticData::Instance().GetMarkUnknown(); if (reportAllFactors == true) { out << phrase; } else { FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor(); std::map<size_t, const Factor*> placeholders; if (placeholderFactor != NOT_FOUND) { // creates map of target position -> factor for placeholders placeholders = GetPlaceholders(edge, placeholderFactor); } size_t size = phrase.GetSize(); for (size_t pos = 0 ; pos < size ; pos++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); if (placeholders.size()) { // do placeholders std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos); if (iter != placeholders.end()) { factor = iter->second; } } CHECK(factor); //preface surface form with UNK if marking unknowns const Word &word = phrase.GetWord(pos); if(markUnknown && word.IsOOV()) { out << "UNK" << *factor; } else { out << *factor; } for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) { const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]); CHECK(factor); out << "|" << *factor; } out << " "; } } // trace ("report segmentation") option "-t" / "-tt" if (reportSegmentation > 0 && phrase.GetSize() > 0) { const WordsRange &sourceRange = edge.GetCurrSourceWordsRange(); const int sourceStart = sourceRange.GetStartPos(); const int sourceEnd = sourceRange.GetEndPos(); out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" if (reportSegmentation == 2) { out << ",wa="; const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); OutputAlignment(out, ai, 0, 0); out << ",total="; out << edge.GetScore() - edge.GetPrevHypo()->GetScore(); out << ","; ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); OutputAllFeatureScores(scoreBreakdown, out); } out << "| "; } }