Ejemplo n.º 1
0
void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const ChartHypothesis *bestHypo, const TranslationSystem* system, long translationId)
{
  std::ostringstream out;

  // Check if we're writing to std::cout.
  if (m_nBestOutputCollector->OutputIsCout()) {
    // Set precision only if we're writing the n-best list to cout.  This is to
    // preserve existing behaviour, but should probably be done either way.
    IOWrapper::FixPrecision(out);

    // The output from -output-hypo-score is always written to std::cout.
    if (StaticData::Instance().GetOutputHypoScore()) {
      if (bestHypo != NULL) {
        out << bestHypo->GetTotalScore() << " ";
      } else {
        out << "0 ";
      }
    }
  }

  bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
  //bool includeAlignment = StaticData::Instance().NBestIncludesAlignment();

  ChartTrellisPathList::const_iterator iter;
  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
    const ChartTrellisPath &path = **iter;
    //cerr << path << endl << endl;

    Moses::Phrase outputPhrase = path.GetOutputPhrase();

    // delete 1st & last
    CHECK(outputPhrase.GetSize() >= 2);
    outputPhrase.RemoveWord(0);
    outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);

    // print the surface factor of the translation
    out << translationId << " ||| ";
    OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
    out << " ||| ";

    // print the scores in a hardwired order
    // before each model type, the corresponding command-line-like name must be emitted
    // MERT script relies on this

    // lm
    const LMList& lml = system->GetLanguageModels();
    if (lml.size() > 0) {
      if (labeledOutput)
        out << "lm:";
      LMList::const_iterator lmi = lml.begin();
      for (; lmi != lml.end(); ++lmi) {
        out << " " << path.GetScoreBreakdown().GetScoreForProducer(*lmi);
      }
    }

    std::string lastName = "";

    // output stateful sparse features
    const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
    for( size_t i=0; i<sff.size(); i++ )
    	if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited)
    		OutputSparseFeatureScores( out, path, sff[i], lastName );

    // translation components
    const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
    if (pds.size() > 0) {
      for( size_t i=0; i<pds.size(); i++ ) {
      	size_t pd_numinputscore = pds[i]->GetNumInputScores();
      	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
      	for (size_t j = 0; j<scores.size(); ++j){
      		if (labeledOutput && (i == 0) ){
      			if ((j == 0) || (j == pd_numinputscore)){
      				lastName =  pds[i]->GetScoreProducerWeightShortName(j);
      				out << " " << lastName << ":";
      			}
      		}
      		out << " " << scores[j];
      	}
      }
    }

    // word penalty
    if (labeledOutput)
      out << " w:";
    out << " " << path.GetScoreBreakdown().GetScoreForProducer(system->GetWordPenaltyProducer());

    // generation
    const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
    if (gds.size() > 0) {
      for( size_t i=0; i<gds.size(); i++ ) {
      	size_t pd_numinputscore = gds[i]->GetNumInputScores();
      	vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
      	for (size_t j = 0; j<scores.size(); ++j){
      		if (labeledOutput && (i == 0) ){
      			if ((j == 0) || (j == pd_numinputscore)){
      				lastName =  gds[i]->GetScoreProducerWeightShortName(j);
      				out << " " << lastName << ":";
      			}
      		}
      		out << " " << scores[j];
      	}
      }
    }

    // output stateless sparse features
    lastName = "";

    const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
    for( size_t i=0; i<slf.size(); i++ ) {
      if (slf[i]->GetNumScoreComponents() == ScoreProducer::unlimited) {
	OutputSparseFeatureScores( out, path, slf[i], lastName );
      }
    }

    // total
    out << " ||| " << path.GetTotalScore();

    /*
    if (includeAlignment) {
    	*m_nBestStream << " |||";
    	for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
    	{
    		const ChartHypothesis &edge = *edges[currEdge];
    		WordsRange sourceRange = edge.GetCurrSourceWordsRange();
    		WordsRange targetRange = edge.GetCurrTargetWordsRange();
    		*m_nBestStream << " " << sourceRange.GetStartPos();
    		if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
    			*m_nBestStream << "-" << sourceRange.GetEndPos();
    		}
    		*m_nBestStream << "=" << targetRange.GetStartPos();
    		if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
    			*m_nBestStream << "-" << targetRange.GetEndPos();
    		}
    	}
    }
    */

    out << endl;
  }

  out <<std::flush;

  CHECK(m_nBestOutputCollector);
  m_nBestOutputCollector->Write(translationId, out.str());
}
Ejemplo n.º 2
0
void IOWrapper::OutputNBestList(const MosesChart::TrellisPathList &nBestList, const MosesChart::Hypothesis *bestHypo, const TranslationSystem* system, long translationId)
{
  std::ostringstream out;

  // Check if we're writing to std::cout.
  if (m_surpressSingleBestOutput)
  {
    // Set precision only if we're writing the n-best list to cout.  This is to
    // preserve existing behaviour, but should probably be done either way.
    IOWrapper::FixPrecision(out);

    // The output from -output-hypo-score is always written to std::cout.
    if (StaticData::Instance().GetOutputHypoScore())
    {
      if (bestHypo != NULL)
      {
        out << bestHypo->GetTotalScore() << " "
            << MosesChart::Hypothesis::GetHypoCount() << " ";
      }
      else
      {
        out << "0 ";
      }
    }
  }

	bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
	//bool includeAlignment = StaticData::Instance().NBestIncludesAlignment();

	MosesChart::TrellisPathList::const_iterator iter;
	for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
	{
		const MosesChart::TrellisPath &path = **iter;
		//cerr << path << endl << endl;

		Moses::Phrase outputPhrase = path.GetOutputPhrase();

		// delete 1st & last
		assert(outputPhrase.GetSize() >= 2);
		outputPhrase.RemoveWord(0);
		outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);

		// print the surface factor of the translation
		out << translationId << " ||| ";
		OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
		out << " ||| ";

		// print the scores in a hardwired order
    // before each model type, the corresponding command-line-like name must be emitted
    // MERT script relies on this

		// lm
		const LMList& lml = system->GetLanguageModels();
    if (lml.size() > 0) {
			if (labeledOutput)
	      out << "lm: ";
		  LMList::const_iterator lmi = lml.begin();
		  for (; lmi != lml.end(); ++lmi) {
			  out << path.GetScoreBreakdown().GetScoreForProducer(*lmi) << " ";
		  }
    }

		// translation components
		if (StaticData::Instance().GetInputType()==SentenceInput){
			// translation components	for text input
			vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
			if (pds.size() > 0) {
				if (labeledOutput)
					out << "tm: ";
				vector<PhraseDictionaryFeature*>::iterator iter;
				for (iter = pds.begin(); iter != pds.end(); ++iter) {
					vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
					for (size_t j = 0; j<scores.size(); ++j)
						out << scores[j] << " ";
				}
			}
		}
		else{
			// translation components for Confusion Network input
			// first translation component has GetNumInputScores() scores from the input Confusion Network
			// at the beginning of the vector
			vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
			if (pds.size() > 0) {
				vector<PhraseDictionaryFeature*>::iterator iter;

				iter = pds.begin();
				vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);

				size_t pd_numinputscore = (*iter)->GetNumInputScores();

				if (pd_numinputscore){

					if (labeledOutput)
						out << "I: ";

					for (size_t j = 0; j < pd_numinputscore; ++j)
						out << scores[j] << " ";
				}


				for (iter = pds.begin() ; iter != pds.end(); ++iter) {
					vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);

					size_t pd_numinputscore = (*iter)->GetNumInputScores();

					if (iter == pds.begin() && labeledOutput)
						out << "tm: ";
					for (size_t j = pd_numinputscore; j < scores.size() ; ++j)
						out << scores[j] << " ";
				}
			}
		}



		// word penalty
		if (labeledOutput)
	    out << "w: ";
		out << path.GetScoreBreakdown().GetScoreForProducer(system->GetWordPenaltyProducer()) << " ";

		// generation
		const vector<GenerationDictionary*> gds = system->GetGenerationDictionaries();
    if (gds.size() > 0) {
			if (labeledOutput)
	      out << "g: ";
		  vector<GenerationDictionary*>::const_iterator iter;
		  for (iter = gds.begin(); iter != gds.end(); ++iter) {
			  vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
			  for (size_t j = 0; j<scores.size(); j++) {
				  out << scores[j] << " ";
			  }
		  }
    }

		// total
    out << "||| " << path.GetTotalScore();

		/*
    if (includeAlignment) {
			*m_nBestStream << " |||";
			for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
			{
				const MosesChart::Hypothesis &edge = *edges[currEdge];
				WordsRange sourceRange = edge.GetCurrSourceWordsRange();
				WordsRange targetRange = edge.GetCurrTargetWordsRange();
				*m_nBestStream << " " << sourceRange.GetStartPos();
				if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
					*m_nBestStream << "-" << sourceRange.GetEndPos();
				}
				*m_nBestStream << "=" << targetRange.GetStartPos();
				if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
					*m_nBestStream << "-" << targetRange.GetEndPos();
				}
			}
    }
		*/

    out << endl;
	}

	out <<std::flush;

  assert(m_nBestOutputCollector);
  m_nBestOutputCollector->Write(translationId, out.str());
}