Beispiel #1
0
void OutputNBest(std::ostream& out
                 , const Moses::TrellisPathList &nBestList
                 , const std::vector<Moses::FactorType>& outputFactorOrder
                 , long translationId
                 , char reportSegmentation)
{
  const StaticData &staticData = StaticData::Instance();
  bool reportAllFactors = staticData.GetReportAllFactorsNBest();
  bool includeSegmentation = staticData.NBestIncludesSegmentation();
  bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();

  TrellisPathList::const_iterator iter;
  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
    const TrellisPath &path = **iter;
    const std::vector<const Hypothesis *> &edges = path.GetEdges();

    // print the surface factor of the translation
    out << translationId << " ||| ";
    for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
      const Hypothesis &edge = *edges[currEdge];
      OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
    }
    out << " |||";

    // print scores with feature names
    OutputAllFeatureScores(path.GetScoreBreakdown(), out );

    // total
    out << " ||| " << path.GetTotalScore();

    //phrase-to-phrase segmentation
    if (includeSegmentation) {
      out << " |||";
      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
        const Hypothesis &edge = *edges[currEdge];
        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
        WordsRange targetRange = path.GetTargetWordsRange(edge);
        out << " " << sourceRange.GetStartPos();
        if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
          out << "-" << sourceRange.GetEndPos();
        }
        out<< "=" << targetRange.GetStartPos();
        if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
          out<< "-" << targetRange.GetEndPos();
        }
      }
    }

    if (includeWordAlignment) {
      out << " ||| ";
      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
        const Hypothesis &edge = *edges[currEdge];
        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
        WordsRange targetRange = path.GetTargetWordsRange(edge);
        const int sourceOffset = sourceRange.GetStartPos();
        const int targetOffset = targetRange.GetStartPos();
        const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();

        OutputAlignment(out, ai, sourceOffset, targetOffset);

      }
    }

    if (StaticData::Instance().IsPathRecoveryEnabled()) {
      out << " ||| ";
      OutputInput(out, edges[0]);
    }

    out << endl;
  }

  out << std::flush;
}
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation)
{
  const StaticData &staticData = StaticData::Instance();
  bool labeledOutput = staticData.IsLabeledNBestList();
  bool reportAllFactors = staticData.GetReportAllFactorsNBest();
  bool includeSegmentation = staticData.NBestIncludesSegmentation();
  bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();

  TrellisPathList::const_iterator iter;
  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
    const TrellisPath &path = **iter;
    const std::vector<const Hypothesis *> &edges = path.GetEdges();

    // print the surface factor of the translation
    out << translationId << " ||| ";
    for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
      const Hypothesis &edge = *edges[currEdge];
      OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
    }
    out << " |||";

    // print scores with feature names
    OutputAllFeatureScores( out, system, path );
    string lastName;

    // translation components
    const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
    if (pds.size() > 0) {

      for( size_t i=0; i<pds.size(); i++ ) {
	size_t pd_numinputscore = pds[i]->GetNumInputScores();
	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
	for (size_t j = 0; j<scores.size(); ++j){

	  if (labeledOutput && (i == 0) ){
	    if ((j == 0) || (j == pd_numinputscore)){
	      lastName =  pds[i]->GetScoreProducerWeightShortName(j);
	      out << " " << lastName << ":";
	    }
	  }
	  out << " " << scores[j];
	}
      }
    }

    // generation
    const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
    if (gds.size() > 0) {

      for( size_t i=0; i<gds.size(); i++ ) {
	size_t pd_numinputscore = gds[i]->GetNumInputScores();
	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
	for (size_t j = 0; j<scores.size(); ++j){

	  if (labeledOutput && (i == 0) ){
	    if ((j == 0) || (j == pd_numinputscore)){
	      lastName =  gds[i]->GetScoreProducerWeightShortName(j);
	      out << " " << lastName << ":";
	    }
	  }
	  out << " " << scores[j];
	}
      }
    }

    // total
    out << " ||| " << path.GetTotalScore();

    //phrase-to-phrase segmentation
    if (includeSegmentation) {
      out << " |||";
      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
        const Hypothesis &edge = *edges[currEdge];
        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
        WordsRange targetRange = path.GetTargetWordsRange(edge);
        out << " " << sourceRange.GetStartPos();
        if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
          out << "-" << sourceRange.GetEndPos();
        }
        out<< "=" << targetRange.GetStartPos();
        if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
          out<< "-" << targetRange.GetEndPos();
        }
      }
    }

    if (includeWordAlignment) {
      out << " ||| ";
      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
        const Hypothesis &edge = *edges[currEdge];
        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
        WordsRange targetRange = path.GetTargetWordsRange(edge);
        const int sourceOffset = sourceRange.GetStartPos();
        const int targetOffset = targetRange.GetStartPos();
        const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
        
        OutputAlignment(out, ai, sourceOffset, targetOffset);

      }
    }

    if (StaticData::Instance().IsPathRecoveryEnabled()) {
      out << "|||";
      OutputInput(out, edges[0]);
    }

    out << endl;
  }

  out << std::flush;
}
Beispiel #3
0
/***
 * print surface factor only for the given phrase
 */
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
                   char reportSegmentation, bool reportAllFactors)
{
  CHECK(outputFactorOrder.size() > 0);
  const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
  bool markUnknown = StaticData::Instance().GetMarkUnknown();
  if (reportAllFactors == true) {
    out << phrase;
  } else {
    FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();

    std::map<size_t, const Factor*> placeholders;
    if (placeholderFactor != NOT_FOUND) {
      // creates map of target position -> factor for placeholders
      placeholders = GetPlaceholders(edge, placeholderFactor);
    }

    size_t size = phrase.GetSize();
    for (size_t pos = 0 ; pos < size ; pos++) {
      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);

      if (placeholders.size()) {
        // do placeholders
        std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
        if (iter != placeholders.end()) {
          factor = iter->second;
        }
      }

      CHECK(factor);

      //preface surface form with UNK if marking unknowns
      const Word &word = phrase.GetWord(pos);
      if(markUnknown && word.IsOOV()) {
        out << "UNK" << *factor;
      } else {
        out << *factor;
      }

      for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
        const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
        CHECK(factor);

        out << "|" << *factor;
      }
      out << " ";
    }
  }

  // trace ("report segmentation") option "-t" / "-tt"
  if (reportSegmentation > 0 && phrase.GetSize() > 0) {
    const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
    const int sourceStart = sourceRange.GetStartPos();
    const int sourceEnd = sourceRange.GetEndPos();
    out << "|" << sourceStart << "-" << sourceEnd;    // enriched "-tt"
    if (reportSegmentation == 2) {
      out << ",wa=";
      const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
      OutputAlignment(out, ai, 0, 0);
      out << ",total=";
      out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
      out << ",";
      ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
      scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
      OutputAllFeatureScores(scoreBreakdown, out);
    }
    out << "| ";
  }
}
void Manager::OutputNBestList(OutputCollector *collector,
                              const KBestExtractor::KBestVec &nBestList,
                              long translationId) const
{
  const StaticData &staticData = StaticData::Instance();

  const std::vector<FactorType> &outputFactorOrder =
    staticData.GetOutputFactorOrder();

  std::ostringstream out;

  if (collector->OutputIsCout()) {
    // Set precision only if we're writing the n-best list to cout.  This is to
    // preserve existing behaviour, but should probably be done either way.
    FixPrecision(out);
  }

  bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
  bool PrintNBestTrees = staticData.PrintNBestTrees();

  for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
      p != nBestList.end(); ++p) {
    const KBestExtractor::Derivation &derivation = **p;

    // get the derivation's target-side yield
    Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation);

    // delete <s> and </s>
    UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
        "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
    outputPhrase.RemoveWord(0);
    outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);

    // print the translation ID, surface factors, and scores
    out << translationId << " ||| ";
    OutputSurface(out, outputPhrase, outputFactorOrder, false);
    out << " ||| ";
    OutputAllFeatureScores(derivation.scoreBreakdown, out);
    out << " ||| " << derivation.score;

    // optionally, print word alignments
    if (includeWordAlignment) {
      out << " ||| ";
      Alignments align;
      OutputAlignmentNBest(align, derivation, 0);
      for (Alignments::const_iterator q = align.begin(); q != align.end();
          ++q) {
        out << q->first << "-" << q->second << " ";
      }
    }

    // optionally, print tree
    if (PrintNBestTrees) {
      TreePointer tree = KBestExtractor::GetOutputTree(derivation);
      out << " ||| " << tree->GetString();
    }

    out << std::endl;
  }

  assert(collector);
  collector->Write(translationId, out.str());
}