Пример #1
0
vector< vector<const Word*> > MosesDecoder::runDecoder(const std::string& source,
    size_t sentenceid,
    size_t nBestSize,
    float bleuObjectiveWeight,
    float bleuScoreWeight,
    vector< ScoreComponentCollection>& featureValues,
    vector< float>& bleuScores,
    vector< float>& modelScores,
    size_t numReturnedTranslations,
    bool realBleu,
    bool distinct,
    size_t rank,
    size_t epoch,
    SearchAlgorithm& search,
    string filename)
{
  // run the decoder
  m_manager = new Moses::Manager(*m_sentence);
  m_manager->Decode();
  TrellisPathList nBestList;
  m_manager->CalcNBest(nBestSize, nBestList, distinct);

  // optionally print nbest to file (to extract scores and features.. currently just for sentence bleu scoring)
  /*if (filename != "") {
    ofstream out(filename.c_str());
    if (!out) {
      ostringstream msg;
      msg << "Unable to open " << filename;
      throw runtime_error(msg.str());
    }
    // TODO: handle sentence id (for now always 0)
    //OutputNBest(out, nBestList, StaticData::Instance().GetOutputFactorOrder(), 0, false);
    out.close();
  }*/

  // read off the feature values and bleu scores for each sentence in the nbest list
  Moses::TrellisPathList::const_iterator iter;
  for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
    const Moses::TrellisPath &path = **iter;
    featureValues.push_back(path.GetScoreBreakdown());
    float bleuScore, dynBleuScore, realBleuScore;
    if (realBleu) realBleuScore = m_bleuScoreFeature->CalculateBleu(path.GetTargetPhrase());
    else dynBleuScore = getBleuScore(featureValues.back());
    bleuScore = realBleu ? realBleuScore : dynBleuScore;
    bleuScores.push_back(bleuScore);

    //std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
    float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
    modelScores.push_back(scoreWithoutBleu);

    if (iter != nBestList.begin())
      cerr << endl;
    cerr << "Rank " << rank << ", epoch " << epoch << ", \"" << path.GetTargetPhrase() << "\", score: "
         << scoreWithoutBleu << ", Bleu: " << bleuScore << ", total: " << path.GetTotalScore();
    if (m_bleuScoreFeature->Enabled() && realBleu)
      cerr << " (d-bleu: " << dynBleuScore << ", r-bleu: " << realBleuScore << ") ";

    // set bleu score to zero in the feature vector since we do not want to optimise its weight
    setBleuScore(featureValues.back(), 0);
  }

  // prepare translations to return
  vector< vector<const Word*> > translations;
  for (size_t i=0; i < numReturnedTranslations && i < nBestList.GetSize(); ++i) {
    const TrellisPath &path = nBestList.at(i);
    Phrase phrase = path.GetTargetPhrase();

    vector<const Word*> translation;
    for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
      const Word &word = phrase.GetWord(pos);
      Word *newWord = new Word(word);
      translation.push_back(newWord);
    }
    translations.push_back(translation);
  }

  return translations;
}
Пример #2
0
vector< vector<const Word*> > MosesDecoder::runChartDecoder(const std::string& source,
    size_t sentenceid,
    size_t nBestSize,
    float bleuObjectiveWeight,
    float bleuScoreWeight,
    vector< ScoreComponentCollection>& featureValues,
    vector< float>& bleuScores,
    vector< float>& modelScores,
    size_t numReturnedTranslations,
    bool realBleu,
    bool distinct,
    size_t rank,
    size_t epoch)
{
  // run the decoder
  m_chartManager = new ChartManager(*m_sentence);
  m_chartManager->Decode();
  ChartKBestExtractor::KBestVec nBestList;
  m_chartManager->CalcNBest(nBestSize, nBestList, distinct);

  // read off the feature values and bleu scores for each sentence in the nbest list
  for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
       p != nBestList.end(); ++p) {
    const ChartKBestExtractor::Derivation &derivation = **p;
    featureValues.push_back(*ChartKBestExtractor::GetOutputScoreBreakdown(derivation));
    float bleuScore, dynBleuScore, realBleuScore;
    dynBleuScore = getBleuScore(featureValues.back());
    Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
    realBleuScore = m_bleuScoreFeature->CalculateBleu(outputPhrase);
    bleuScore = realBleu ? realBleuScore : dynBleuScore;
    bleuScores.push_back(bleuScore);

    float scoreWithoutBleu = derivation.score - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
    modelScores.push_back(scoreWithoutBleu);

    if (p != nBestList.begin())
      cerr << endl;
    cerr << "Rank " << rank << ", epoch " << epoch << ", \"" << outputPhrase << "\", score: "
         << scoreWithoutBleu << ", Bleu: " << bleuScore << ", total: " << derivation.score;
    if (m_bleuScoreFeature->Enabled() && realBleu)
      cerr << " (d-bleu: " << dynBleuScore << ", r-bleu: " << realBleuScore << ") ";

    // set bleu score to zero in the feature vector since we do not want to optimise its weight
    setBleuScore(featureValues.back(), 0);
  }

  // prepare translations to return
  vector< vector<const Word*> > translations;
  for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
       p != nBestList.end(); ++p) {
    const ChartKBestExtractor::Derivation &derivation = **p;
    Phrase phrase = ChartKBestExtractor::GetOutputPhrase(derivation);

    vector<const Word*> translation;
    for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
      const Word &word = phrase.GetWord(pos);
      Word *newWord = new Word(word);
      translation.push_back(newWord);
    }
    translations.push_back(translation);
  }

  return translations;
}
  vector< vector<const Word*> > MosesDecoder::runChartDecoder(const std::string& source,
                              size_t sentenceid,
                              size_t nBestSize,
                              float bleuObjectiveWeight,
                              float bleuScoreWeight,
                              vector< ScoreComponentCollection>& featureValues,
                              vector< float>& bleuScores,
                              vector< float>& modelScores,
                              size_t numReturnedTranslations,
                              bool realBleu,
                              bool distinct,
                              size_t rank,
                              size_t epoch,
                              const TranslationSystem& system) {
  	// run the decoder
    m_chartManager = new ChartManager(*m_sentence, &system);
    m_chartManager->ProcessSentence();
    ChartTrellisPathList nBestList;
    m_chartManager->CalcNBest(nBestSize, nBestList, distinct);

    // read off the feature values and bleu scores for each sentence in the nbest list
    ChartTrellisPathList::const_iterator iter;
    for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
    	const Moses::ChartTrellisPath &path = **iter;
    	featureValues.push_back(path.GetScoreBreakdown());
    	float bleuScore, dynBleuScore, realBleuScore;
    	dynBleuScore = getBleuScore(featureValues.back());  
    	realBleuScore = m_bleuScoreFeature->CalculateBleu(path.GetOutputPhrase());
    	bleuScore = realBleu ? realBleuScore : dynBleuScore; 
    	bleuScores.push_back(bleuScore);

    	//std::cout << "Score breakdown: " << path.GetScoreBreakdown() << endl;
    	float scoreWithoutBleu = path.GetTotalScore() - (bleuObjectiveWeight * bleuScoreWeight * bleuScore);
    	modelScores.push_back(scoreWithoutBleu);

    	if (iter != nBestList.begin())
    	  cerr << endl;
    	cerr << "Rank " << rank << ", epoch " << epoch << ", \"" << path.GetOutputPhrase() << "\", score: " 
    		 << scoreWithoutBleu << ", Bleu: " << bleuScore << ", total: " << path.GetTotalScore();
	if (m_bleuScoreFeature->Enabled() && realBleu)
	  cerr << " (d-bleu: " << dynBleuScore << ", r-bleu: " << realBleuScore << ") ";

    	// set bleu score to zero in the feature vector since we do not want to optimise its weight
    	setBleuScore(featureValues.back(), 0);
    }

    // prepare translations to return
    vector< vector<const Word*> > translations;
    for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
        const ChartTrellisPath &path = **iter;
        Phrase phrase = path.GetOutputPhrase();

        vector<const Word*> translation;
        for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
        	const Word &word = phrase.GetWord(pos);
        	Word *newWord = new Word(word);
        	translation.push_back(newWord);
        }
        translations.push_back(translation);
    }

    return translations;
  }