Ejemplo n.º 1
0
void OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
{
  ostringstream out;
  OutputAlignment(out, edges);

  collector->Write(lineNo,out.str());
}
Ejemplo n.º 2
0
/***
 * print surface factor only for the given phrase
 */
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
                   char reportSegmentation, bool reportAllFactors)
{
  CHECK(outputFactorOrder.size() > 0);
  const Phrase& phrase = edge.GetCurrTargetPhrase();
  bool markUnknown = StaticData::Instance().GetMarkUnknown();
  if (reportAllFactors == true) {
    out << phrase;
  } else {
    FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor().second;

    size_t size = phrase.GetSize();
    for (size_t pos = 0 ; pos < size ; pos++) {
      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);

      if (placeholderFactor != NOT_FOUND) {
        const Factor *origFactor = phrase.GetFactor(pos, placeholderFactor);
        if (origFactor) {
          factor = origFactor;
        }
      }
      CHECK(factor);

      //preface surface form with UNK if marking unknowns
      const Word &word = phrase.GetWord(pos);
      if(markUnknown && word.IsOOV()) {
	out << "UNK" << *factor;
      }
      else {
	out << *factor;
      }
      
      for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
        const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
        CHECK(factor);

        out << "|" << *factor;
      }
      out << " ";
    }
  }

  // trace option "-t" / "-tt"
  if (reportSegmentation > 0 && phrase.GetSize() > 0) {
    const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
    const int sourceStart = sourceRange.GetStartPos();
    const int sourceEnd = sourceRange.GetEndPos();
    out << "|" << sourceStart << "-" << sourceEnd;
    // enriched "-tt"
    if (reportSegmentation == 2) {
      out << ",0, ";
      const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
      OutputAlignment(out, ai, 0, 0);
    }
    out << "| ";
  }
}
Ejemplo n.º 3
0
void OutputAlignment(std::ostream &out, const Moses::Hypothesis *hypo)
{
  std::vector<const Hypothesis *> edges;
  const Hypothesis *currentHypo = hypo;
  while (currentHypo) {
    edges.push_back(currentHypo);
    currentHypo = currentHypo->GetPrevHypo();
  }

  OutputAlignment(out, edges);

}
Ejemplo n.º 4
0
void OutputAlignment(OutputCollector* collector, size_t lineNo , const Hypothesis *hypo)
{
  if (collector) {
    std::vector<const Hypothesis *> edges;
    const Hypothesis *currentHypo = hypo;
    while (currentHypo) {
      edges.push_back(currentHypo);
      currentHypo = currentHypo->GetPrevHypo();
    }

    OutputAlignment(collector,lineNo, edges);
  }
}
Ejemplo n.º 5
0
void OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
{
  size_t targetOffset = 0;

  for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
    const Hypothesis &edge = *edges[currEdge];
    const TargetPhrase &tp = edge.GetCurrTargetPhrase();
    size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();

    OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);

    targetOffset += tp.GetSize();
  }
  out << std::endl;
}
Ejemplo n.º 6
0
void OutputNBest(std::ostream& out
                 , const Moses::TrellisPathList &nBestList
                 , const std::vector<Moses::FactorType>& outputFactorOrder
                 , long translationId
                 , char reportSegmentation)
{
  const StaticData &staticData = StaticData::Instance();
  bool reportAllFactors = staticData.GetReportAllFactorsNBest();
  bool includeSegmentation = staticData.NBestIncludesSegmentation();
  bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();

  TrellisPathList::const_iterator iter;
  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
    const TrellisPath &path = **iter;
    const std::vector<const Hypothesis *> &edges = path.GetEdges();

    // print the surface factor of the translation
    out << translationId << " ||| ";
    for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
      const Hypothesis &edge = *edges[currEdge];
      OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
    }
    out << " |||";

    // print scores with feature names
    OutputAllFeatureScores(path.GetScoreBreakdown(), out );

    // total
    out << " ||| " << path.GetTotalScore();

    //phrase-to-phrase segmentation
    if (includeSegmentation) {
      out << " |||";
      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
        const Hypothesis &edge = *edges[currEdge];
        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
        WordsRange targetRange = path.GetTargetWordsRange(edge);
        out << " " << sourceRange.GetStartPos();
        if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
          out << "-" << sourceRange.GetEndPos();
        }
        out<< "=" << targetRange.GetStartPos();
        if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
          out<< "-" << targetRange.GetEndPos();
        }
      }
    }

    if (includeWordAlignment) {
      out << " ||| ";
      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
        const Hypothesis &edge = *edges[currEdge];
        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
        WordsRange targetRange = path.GetTargetWordsRange(edge);
        const int sourceOffset = sourceRange.GetStartPos();
        const int targetOffset = targetRange.GetStartPos();
        const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();

        OutputAlignment(out, ai, sourceOffset, targetOffset);

      }
    }

    if (StaticData::Instance().IsPathRecoveryEnabled()) {
      out << " ||| ";
      OutputInput(out, edges[0]);
    }

    out << endl;
  }

  out << std::flush;
}
Ejemplo n.º 7
0
void OutputAlignment(OutputCollector* collector, size_t lineNo , const TrellisPath &path)
{
  if (collector) {
    OutputAlignment(collector,lineNo, path.GetEdges());
  }
}
Ejemplo n.º 8
0
/***
 * print surface factor only for the given phrase
 */
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
                   char reportSegmentation, bool reportAllFactors)
{
  CHECK(outputFactorOrder.size() > 0);
  const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
  bool markUnknown = StaticData::Instance().GetMarkUnknown();
  if (reportAllFactors == true) {
    out << phrase;
  } else {
    FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();

    std::map<size_t, const Factor*> placeholders;
    if (placeholderFactor != NOT_FOUND) {
      // creates map of target position -> factor for placeholders
      placeholders = GetPlaceholders(edge, placeholderFactor);
    }

    size_t size = phrase.GetSize();
    for (size_t pos = 0 ; pos < size ; pos++) {
      const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);

      if (placeholders.size()) {
        // do placeholders
        std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
        if (iter != placeholders.end()) {
          factor = iter->second;
        }
      }

      CHECK(factor);

      //preface surface form with UNK if marking unknowns
      const Word &word = phrase.GetWord(pos);
      if(markUnknown && word.IsOOV()) {
        out << "UNK" << *factor;
      } else {
        out << *factor;
      }

      for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
        const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
        CHECK(factor);

        out << "|" << *factor;
      }
      out << " ";
    }
  }

  // trace ("report segmentation") option "-t" / "-tt"
  if (reportSegmentation > 0 && phrase.GetSize() > 0) {
    const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
    const int sourceStart = sourceRange.GetStartPos();
    const int sourceEnd = sourceRange.GetEndPos();
    out << "|" << sourceStart << "-" << sourceEnd;    // enriched "-tt"
    if (reportSegmentation == 2) {
      out << ",wa=";
      const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
      OutputAlignment(out, ai, 0, 0);
      out << ",total=";
      out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
      out << ",";
      ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
      scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
      OutputAllFeatureScores(scoreBreakdown, out);
    }
    out << "| ";
  }
}
Ejemplo n.º 9
0
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation)
{
  const StaticData &staticData = StaticData::Instance();
  bool labeledOutput = staticData.IsLabeledNBestList();
  bool reportAllFactors = staticData.GetReportAllFactorsNBest();
  bool includeSegmentation = staticData.NBestIncludesSegmentation();
  bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();

  TrellisPathList::const_iterator iter;
  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
    const TrellisPath &path = **iter;
    const std::vector<const Hypothesis *> &edges = path.GetEdges();

    // print the surface factor of the translation
    out << translationId << " ||| ";
    for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
      const Hypothesis &edge = *edges[currEdge];
      OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
    }
    out << " |||";

    // print scores with feature names
    OutputAllFeatureScores( out, system, path );
    string lastName;

    // translation components
    const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
    if (pds.size() > 0) {

      for( size_t i=0; i<pds.size(); i++ ) {
	size_t pd_numinputscore = pds[i]->GetNumInputScores();
	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
	for (size_t j = 0; j<scores.size(); ++j){

	  if (labeledOutput && (i == 0) ){
	    if ((j == 0) || (j == pd_numinputscore)){
	      lastName =  pds[i]->GetScoreProducerWeightShortName(j);
	      out << " " << lastName << ":";
	    }
	  }
	  out << " " << scores[j];
	}
      }
    }

    // generation
    const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
    if (gds.size() > 0) {

      for( size_t i=0; i<gds.size(); i++ ) {
	size_t pd_numinputscore = gds[i]->GetNumInputScores();
	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
	for (size_t j = 0; j<scores.size(); ++j){

	  if (labeledOutput && (i == 0) ){
	    if ((j == 0) || (j == pd_numinputscore)){
	      lastName =  gds[i]->GetScoreProducerWeightShortName(j);
	      out << " " << lastName << ":";
	    }
	  }
	  out << " " << scores[j];
	}
      }
    }

    // total
    out << " ||| " << path.GetTotalScore();

    //phrase-to-phrase segmentation
    if (includeSegmentation) {
      out << " |||";
      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
        const Hypothesis &edge = *edges[currEdge];
        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
        WordsRange targetRange = path.GetTargetWordsRange(edge);
        out << " " << sourceRange.GetStartPos();
        if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
          out << "-" << sourceRange.GetEndPos();
        }
        out<< "=" << targetRange.GetStartPos();
        if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
          out<< "-" << targetRange.GetEndPos();
        }
      }
    }

    if (includeWordAlignment) {
      out << " ||| ";
      for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
        const Hypothesis &edge = *edges[currEdge];
        const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
        WordsRange targetRange = path.GetTargetWordsRange(edge);
        const int sourceOffset = sourceRange.GetStartPos();
        const int targetOffset = targetRange.GetStartPos();
        const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
        
        OutputAlignment(out, ai, sourceOffset, targetOffset);

      }
    }

    if (StaticData::Instance().IsPathRecoveryEnabled()) {
      out << "|||";
      OutputInput(out, edges[0]);
    }

    out << endl;
  }

  out << std::flush;
}